diff --git a/.gitignore b/.gitignore index 7ec99fd844..f7f3d1c8e1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ build* +docker_build* +hip_build* .vscode* opencv-3.4.0/* openvx_test_results* diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt index 61de34470d..31c1116b43 100644 --- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt +++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt @@ -49,276 +49,86 @@ include_directories(../../amd_openvx/openvx/include/ ) list(APPEND SOURCES - source/AbsoluteDifference.cpp source/AbsoluteDifferencebatchPD.cpp - source/AbsoluteDifferencebatchPDROID.cpp - source/AbsoluteDifferencebatchPS.cpp - source/Accumulate.cpp source/AccumulatebatchPD.cpp - source/AccumulatebatchPDROID.cpp - source/AccumulatebatchPS.cpp - source/AccumulateSquared.cpp source/AccumulateSquaredbatchPD.cpp - source/AccumulateSquaredbatchPDROID.cpp - source/AccumulateSquaredbatchPS.cpp - source/AccumulateWeighted.cpp source/AccumulateWeightedbatchPD.cpp - source/AccumulateWeightedbatchPDROID.cpp - source/AccumulateWeightedbatchPS.cpp - source/Add.cpp source/AddbatchPD.cpp - source/AddbatchPDROID.cpp - source/AddbatchPS.cpp - source/BilateralFilter.cpp source/BilateralFilterbatchPD.cpp - source/BilateralFilterbatchPDROID.cpp - source/BilateralFilterbatchPS.cpp - source/BitwiseAND.cpp source/BitwiseANDbatchPD.cpp - source/BitwiseANDbatchPDROID.cpp - source/BitwiseANDbatchPS.cpp - source/BitwiseNOT.cpp source/BitwiseNOTbatchPD.cpp - source/BitwiseNOTbatchPDROID.cpp - source/BitwiseNOTbatchPS.cpp - source/Blend.cpp source/BlendbatchPD.cpp - source/BlendbatchPS.cpp - source/BlendbatchPDROID.cpp - source/Blur.cpp source/BlurbatchPD.cpp - source/BlurbatchPDROID.cpp - source/BlurbatchPS.cpp - source/BoxFilter.cpp source/BoxFilterbatchPD.cpp - source/BoxFilterbatchPDROID.cpp - source/BoxFilterbatchPS.cpp - source/Brightness.cpp source/BrightnessbatchPD.cpp - source/BrightnessbatchPDROID.cpp - source/BrightnessbatchPS.cpp source/CannyEdgeDetector.cpp - source/CannyEdgeDetector.cpp - source/ChannelCombine.cpp source/ChannelCombinebatchPD.cpp - source/ChannelCombinebatchPS.cpp - source/ChannelExtract.cpp source/ChannelExtractbatchPD.cpp - source/ChannelExtractbatchPS.cpp - source/ColorTemperature.cpp source/ColorTemperaturebatchPD.cpp - source/ColorTemperaturebatchPDROID.cpp - source/ColorTemperaturebatchPS.cpp - source/ColorTwist.cpp source/ColorTwistbatchPD.cpp - source/Contrast.cpp source/ContrastbatchPD.cpp - source/ContrastbatchPDROID.cpp - source/ContrastbatchPS.cpp - source/ControlFlow.cpp - source/ControlFlowbatchPD.cpp - source/ControlFlowbatchPDROID.cpp - source/ControlFlowbatchPS.cpp source/copy.cpp source/CropMirrorNormalizePD.cpp source/CropPD.cpp - source/CustomConvolution.cpp source/CustomConvolutionbatchPD.cpp - source/CustomConvolutionbatchPDROID.cpp - source/CustomConvolutionbatchPS.cpp - source/DataObjectCopy.cpp source/DataObjectCopybatchPD.cpp - source/DataObjectCopybatchPDROID.cpp - source/DataObjectCopybatchPS.cpp - source/Dilate.cpp source/DilatebatchPD.cpp - source/DilatebatchPDROID.cpp - source/DilatebatchPS.cpp - source/Erode.cpp source/ErodebatchPD.cpp - source/ErodebatchPDROID.cpp - source/ErodebatchPS.cpp - source/ExclusiveOR.cpp source/ExclusiveORbatchPD.cpp - source/ExclusiveORbatchPDROID.cpp - source/ExclusiveORbatchPS.cpp - source/Exposure.cpp source/ExposurebatchPD.cpp - source/ExposurebatchPDROID.cpp - source/ExposurebatchPS.cpp source/FastCornerDetector.cpp - source/Fisheye.cpp source/FisheyebatchPD.cpp - source/FisheyebatchPDROID.cpp - source/FisheyebatchPS.cpp - source/Flip.cpp source/FlipbatchPD.cpp - source/FlipbatchPDROID.cpp - source/FlipbatchPS.cpp - source/Fog.cpp source/FogbatchPD.cpp - source/FogbatchPDROID.cpp - source/FogbatchPS.cpp - source/GammaCorrection.cpp source/GammaCorrectionbatchPD.cpp - source/GammaCorrectionbatchPDROID.cpp - source/GammaCorrectionbatchPS.cpp - source/GaussianFilter.cpp source/GaussianFilterbatchPD.cpp - source/GaussianFilterbatchPDROID.cpp - source/GaussianFilterbatchPS.cpp - source/GaussianImagePyramid.cpp source/GaussianImagePyramidbatchPD.cpp - source/GaussianImagePyramidbatchPS.cpp source/HarrisCornerDetector.cpp source/Histogram.cpp - source/HistogramBalance.cpp source/HistogramBalancebatchPD.cpp - source/HistogramBalancebatchPDROID.cpp - source/HistogramBalancebatchPS.cpp - source/HistogramEqualize.cpp source/HistogramEqualizebatchPD.cpp - source/HistogramEqualizebatchPDROID.cpp - source/HistogramEqualizebatchPS.cpp - source/Hue.cpp source/HuebatchPD.cpp - source/HuebatchPDROID.cpp - source/HuebatchPS.cpp - source/InclusiveOR.cpp source/InclusiveORbatchPD.cpp - source/InclusiveORbatchPDROID.cpp - source/InclusiveORbatchPS.cpp - source/Jitter.cpp source/JitterbatchPD.cpp - source/JitterbatchPDROID.cpp - source/JitterbatchPS.cpp source/LaplacianImagePyramid.cpp - source/LensCorrection.cpp source/LensCorrectionbatchPD.cpp - source/LensCorrectionbatchPDROID.cpp - source/LensCorrectionbatchPS.cpp - source/LocalBinaryPattern.cpp source/LocalBinaryPatternbatchPD.cpp - source/LocalBinaryPatternbatchPDROID.cpp - source/LocalBinaryPatternbatchPS.cpp - source/LookUpTable.cpp source/LookUpTablebatchPD.cpp - source/LookUpTablebatchPDROID.cpp - source/LookUpTablebatchPS.cpp - source/Magnitude.cpp source/MagnitudebatchPD.cpp - source/MagnitudebatchPDROID.cpp - source/MagnitudebatchPS.cpp - source/Max.cpp source/MaxbatchPD.cpp - source/MaxbatchPDROID.cpp - source/MaxbatchPS.cpp source/MeanStddev.cpp - source/MedianFilter.cpp source/MedianFilterbatchPD.cpp - source/MedianFilterbatchPDROID.cpp - source/MedianFilterbatchPS.cpp - source/Min.cpp source/MinbatchPD.cpp - source/MinbatchPDROID.cpp - source/MinbatchPS.cpp source/MinMaxLoc.cpp - source/Multiply.cpp source/MultiplybatchPD.cpp - source/MultiplybatchPDROID.cpp - source/MultiplybatchPS.cpp - source/Noise.cpp source/NoisebatchPD.cpp - source/NoisebatchPDROID.cpp - source/NoisebatchPS.cpp - source/NonLinearFilter.cpp source/NonLinearFilterbatchPD.cpp - source/NonLinearFilterbatchPDROID.cpp - source/NonLinearFilterbatchPS.cpp - source/NonMaxSupression.cpp source/NonMaxSupressionbatchPD.cpp - source/NonMaxSupressionbatchPDROID.cpp - source/NonMaxSupressionbatchPS.cpp source/nop.cpp - source/Occlusion.cpp - source/OcclusionbatchPD.cpp - source/OcclusionbatchPDROID.cpp - source/OcclusionbatchPS.cpp - source/Phase.cpp source/PhasebatchPD.cpp - source/PhasebatchPDROID.cpp - source/PhasebatchPS.cpp - source/Pixelate.cpp source/PixelatebatchPD.cpp - source/PixelatebatchPDROID.cpp - source/PixelatebatchPS.cpp - source/Rain.cpp source/RainbatchPD.cpp - source/RainbatchPDROID.cpp - source/RainbatchPS.cpp - source/RandomCropLetterBox.cpp source/RandomCropLetterBoxbatchPD.cpp - source/RandomCropLetterBoxbatchPDROID.cpp - source/RandomCropLetterBoxbatchPS.cpp - source/RandomShadow.cpp source/RandomShadowbatchPD.cpp - source/RandomShadowbatchPDROID.cpp - source/RandomShadowbatchPS.cpp source/Remap.cpp - source/Resize.cpp source/ResizebatchPD.cpp - source/ResizebatchPDROID.cpp - source/ResizebatchPS.cpp - source/ResizeCrop.cpp source/ResizeCropbatchPD.cpp - source/ResizeCropbatchPDROID.cpp - source/ResizeCropbatchPS.cpp source/ResizeCropMirrorPD.cpp - source/Rotate.cpp source/RotatebatchPD.cpp - source/RotatebatchPDROID.cpp - source/RotatebatchPS.cpp - source/Saturation.cpp source/SaturationbatchPD.cpp - source/SaturationbatchPDROID.cpp - source/SaturationbatchPS.cpp - source/Scale.cpp source/ScalebatchPD.cpp - source/ScalebatchPDROID.cpp - source/ScalebatchPS.cpp - source/Snow.cpp source/SnowbatchPD.cpp - source/SnowbatchPDROID.cpp - source/SnowbatchPS.cpp - source/Sobel.cpp source/SobelbatchPD.cpp - source/SobelbatchPDROID.cpp - source/SobelbatchPS.cpp - source/Subtract.cpp source/SubtractbatchPD.cpp - source/SubtractbatchPDROID.cpp - source/SubtractbatchPS.cpp source/TensorAdd.cpp source/TensorLookup.cpp source/TensorMatrixMultiply.cpp source/TensorMultiply.cpp source/TensorSubtract.cpp - source/Thresholding.cpp source/ThresholdingbatchPD.cpp - source/ThresholdingbatchPDROID.cpp - source/ThresholdingbatchPS.cpp - source/Vignette.cpp source/VignettebatchPD.cpp - source/VignettebatchPDROID.cpp - source/VignettebatchPS.cpp - source/WarpAffine.cpp source/WarpAffinebatchPD.cpp - source/WarpAffinebatchPDROID.cpp - source/WarpAffinebatchPS.cpp - source/WarpPerspective.cpp source/WarpPerspectivebatchPD.cpp - source/WarpPerspectivebatchPDROID.cpp - source/WarpPerspectivebatchPS.cpp source/kernel_rpp.cpp source/internal_publishKernels.cpp ) diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h index 43b7ed4067..11aea318d9 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h @@ -35,548 +35,167 @@ extern "C" SHARED_PUBLIC vx_status VX_API_CALL vxPublishKernels(vx_context conte vx_status ADD_KERENEL(std::function); vx_status get_kernels_to_publish(); -vx_status AbsoluteDifference_Register(vx_context); vx_status AbsoluteDifferencebatchPD_Register(vx_context); -vx_status AbsoluteDifferencebatchPDROID_Register(vx_context); -vx_status AbsoluteDifferencebatchPS_Register(vx_context); -vx_status Accumulate_Register(vx_context); vx_status AccumulatebatchPD_Register(vx_context); -vx_status AccumulatebatchPDROID_Register(vx_context); -vx_status AccumulatebatchPS_Register(vx_context); -vx_status AccumulateSquared_Register(vx_context); vx_status AccumulateSquaredbatchPD_Register(vx_context); -vx_status AccumulateSquaredbatchPDROID_Register(vx_context); -vx_status AccumulateSquaredbatchPS_Register(vx_context); -vx_status AccumulateWeighted_Register(vx_context); vx_status AccumulateWeightedbatchPD_Register(vx_context); -vx_status AccumulateWeightedbatchPDROID_Register(vx_context); -vx_status AccumulateWeightedbatchPS_Register(vx_context); -vx_status Add_Register(vx_context); vx_status AddbatchPD_Register(vx_context); -vx_status AddbatchPDROID_Register(vx_context); -vx_status AddbatchPS_Register(vx_context); -vx_status BilateralFilter_Register(vx_context); vx_status BilateralFilterbatchPD_Register(vx_context); -vx_status BilateralFilterbatchPDROID_Register(vx_context); -vx_status BilateralFilterbatchPS_Register(vx_context); -vx_status BitwiseAND_Register(vx_context); vx_status BitwiseANDbatchPD_Register(vx_context); -vx_status BitwiseANDbatchPDROID_Register(vx_context); -vx_status BitwiseANDbatchPS_Register(vx_context); -vx_status BitwiseNOT_Register(vx_context); vx_status BitwiseNOTbatchPD_Register(vx_context); -vx_status BitwiseNOTbatchPDROID_Register(vx_context); -vx_status BitwiseNOTbatchPS_Register(vx_context); -vx_status Blend_Register(vx_context); vx_status BlendbatchPD_Register(vx_context); -vx_status BlendbatchPDROID_Register(vx_context); -vx_status BlendbatchPS_Register(vx_context); -vx_status Blur_Register(vx_context); vx_status BlurbatchPD_Register(vx_context); -vx_status BlurbatchPDROID_Register(vx_context); -vx_status BlurbatchPS_Register(vx_context); -vx_status BoxFilter_Register(vx_context); vx_status BoxFilterbatchPD_Register(vx_context); -vx_status BoxFilterbatchPDROID_Register(vx_context); -vx_status BoxFilterbatchPS_Register(vx_context); -vx_status Brightness_Register(vx_context); vx_status BrightnessbatchPD_Register(vx_context); -vx_status BrightnessbatchPDROID_Register(vx_context); -vx_status BrightnessbatchPS_Register(vx_context); vx_status CannyEdgeDetector_Register(vx_context); -vx_status ChannelCombine_Register(vx_context); vx_status ChannelCombinebatchPD_Register(vx_context); -vx_status ChannelCombinebatchPS_Register(vx_context); -vx_status ChannelExtract_Register(vx_context); vx_status ChannelExtractbatchPD_Register(vx_context); -vx_status ChannelExtractbatchPS_Register(vx_context); -vx_status ColorTemperature_Register(vx_context); vx_status ColorTemperaturebatchPD_Register(vx_context); -vx_status ColorTemperaturebatchPDROID_Register(vx_context); -vx_status ColorTemperaturebatchPS_Register(vx_context); -vx_status ColorTwist_Register(vx_context); vx_status ColorTwistbatchPD_Register(vx_context); -vx_status Contrast_Register(vx_context); vx_status ContrastbatchPD_Register(vx_context); -vx_status ContrastbatchPDROID_Register(vx_context); -vx_status ContrastbatchPS_Register(vx_context); -vx_status ControlFlow_Register(vx_context); -vx_status ControlFlowbatchPD_Register(vx_context); -vx_status ControlFlowbatchPDROID_Register(vx_context); -vx_status ControlFlowbatchPS_Register(vx_context); vx_status Copy_Register(vx_context); vx_status CropMirrorNormalizePD_Register(vx_context); vx_status CropPD_Register(vx_context); -vx_status CustomConvolution_Register(vx_context); vx_status CustomConvolutionbatchPD_Register(vx_context); -vx_status CustomConvolutionbatchPDROID_Register(vx_context); -vx_status CustomConvolutionbatchPS_Register(vx_context); -vx_status DataObjectCopy_Register(vx_context); vx_status DataObjectCopybatchPD_Register(vx_context); -vx_status DataObjectCopybatchPDROID_Register(vx_context); -vx_status DataObjectCopybatchPS_Register(vx_context); -vx_status Dilate_Register(vx_context); vx_status DilatebatchPD_Register(vx_context); -vx_status DilatebatchPDROID_Register(vx_context); -vx_status DilatebatchPS_Register(vx_context); -vx_status Erode_Register(vx_context); vx_status ErodebatchPD_Register(vx_context); -vx_status ErodebatchPDROID_Register(vx_context); -vx_status ErodebatchPS_Register(vx_context); -vx_status ExclusiveOR_Register(vx_context); vx_status ExclusiveORbatchPD_Register(vx_context); -vx_status ExclusiveORbatchPDROID_Register(vx_context); -vx_status ExclusiveORbatchPS_Register(vx_context); -vx_status Exposure_Register(vx_context); vx_status ExposurebatchPD_Register(vx_context); -vx_status ExposurebatchPDROID_Register(vx_context); -vx_status ExposurebatchPS_Register(vx_context); vx_status FastCornerDetector_Register(vx_context); -vx_status Fisheye_Register(vx_context); vx_status FisheyebatchPD_Register(vx_context); -vx_status FisheyebatchPDROID_Register(vx_context); -vx_status FisheyebatchPS_Register(vx_context); -vx_status Flip_Register(vx_context); vx_status FlipbatchPD_Register(vx_context); -vx_status FlipbatchPDROID_Register(vx_context); -vx_status FlipbatchPS_Register(vx_context); -vx_status Fog_Register(vx_context); vx_status FogbatchPD_Register(vx_context); -vx_status FogbatchPDROID_Register(vx_context); -vx_status FogbatchPS_Register(vx_context); -vx_status GammaCorrection_Register(vx_context); vx_status GammaCorrectionbatchPD_Register(vx_context); -vx_status GammaCorrectionbatchPDROID_Register(vx_context); -vx_status GammaCorrectionbatchPS_Register(vx_context); -vx_status GaussianFilter_Register(vx_context); vx_status GaussianFilterbatchPD_Register(vx_context); -vx_status GaussianFilterbatchPDROID_Register(vx_context); -vx_status GaussianFilterbatchPS_Register(vx_context); -vx_status GaussianImagePyramid_Register(vx_context); vx_status GaussianImagePyramidbatchPD_Register(vx_context); -vx_status GaussianImagePyramidbatchPS_Register(vx_context); vx_status HarrisCornerDetector_Register(vx_context); vx_status Histogram_Register(vx_context); -vx_status HistogramBalance_Register(vx_context); vx_status HistogramBalancebatchPD_Register(vx_context); -vx_status HistogramBalancebatchPDROID_Register(vx_context); -vx_status HistogramBalancebatchPS_Register(vx_context); -vx_status HistogramEqualize_Register(vx_context); vx_status HistogramEqualizebatchPD_Register(vx_context); -vx_status HistogramEqualizebatchPDROID_Register(vx_context); -vx_status HistogramEqualizebatchPS_Register(vx_context); -vx_status Hue_Register(vx_context); vx_status HuebatchPD_Register(vx_context); -vx_status HuebatchPDROID_Register(vx_context); -vx_status HuebatchPS_Register(vx_context); -vx_status InclusiveOR_Register(vx_context); vx_status InclusiveORbatchPD_Register(vx_context); -vx_status InclusiveORbatchPDROID_Register(vx_context); -vx_status InclusiveORbatchPS_Register(vx_context); -vx_status Jitter_Register(vx_context); vx_status JitterbatchPD_Register(vx_context); -vx_status JitterbatchPDROID_Register(vx_context); -vx_status JitterbatchPS_Register(vx_context); vx_status LaplacianImagePyramid_Register(vx_context); -vx_status LensCorrection_Register(vx_context); vx_status LensCorrectionbatchPD_Register(vx_context); -vx_status LensCorrectionbatchPDROID_Register(vx_context); -vx_status LensCorrectionbatchPS_Register(vx_context); -vx_status LocalBinaryPattern_Register(vx_context); vx_status LocalBinaryPatternbatchPD_Register(vx_context); -vx_status LocalBinaryPatternbatchPDROID_Register(vx_context); -vx_status LocalBinaryPatternbatchPS_Register(vx_context); -vx_status LookUpTable_Register(vx_context); vx_status LookUpTablebatchPD_Register(vx_context); -vx_status LookUpTablebatchPDROID_Register(vx_context); -vx_status LookUpTablebatchPS_Register(vx_context); -vx_status Magnitude_Register(vx_context); vx_status MagnitudebatchPD_Register(vx_context); -vx_status MagnitudebatchPDROID_Register(vx_context); -vx_status MagnitudebatchPS_Register(vx_context); -vx_status Max_Register(vx_context); vx_status MaxbatchPD_Register(vx_context); -vx_status MaxbatchPDROID_Register(vx_context); -vx_status MaxbatchPS_Register(vx_context); vx_status MeanStddev_Register(vx_context); -vx_status MedianFilter_Register(vx_context); vx_status MedianFilterbatchPD_Register(vx_context); -vx_status MedianFilterbatchPDROID_Register(vx_context); -vx_status MedianFilterbatchPS_Register(vx_context); -vx_status Min_Register(vx_context); vx_status MinbatchPD_Register(vx_context); -vx_status MinbatchPDROID_Register(vx_context); -vx_status MinbatchPS_Register(vx_context); vx_status MinMaxLoc_Register(vx_context); -vx_status Multiply_Register(vx_context); vx_status MultiplybatchPD_Register(vx_context); -vx_status MultiplybatchPDROID_Register(vx_context); -vx_status MultiplybatchPS_Register(vx_context); -vx_status Noise_Register(vx_context); vx_status NoisebatchPD_Register(vx_context); -vx_status NoisebatchPDROID_Register(vx_context); -vx_status NoisebatchPS_Register(vx_context); -vx_status NonLinearFilter_Register(vx_context); vx_status NonLinearFilterbatchPD_Register(vx_context); -vx_status NonLinearFilterbatchPDROID_Register(vx_context); -vx_status NonLinearFilterbatchPS_Register(vx_context); -vx_status NonMaxSupression_Register(vx_context); vx_status NonMaxSupressionbatchPD_Register(vx_context); -vx_status NonMaxSupressionbatchPDROID_Register(vx_context); -vx_status NonMaxSupressionbatchPS_Register(vx_context); vx_status Nop_Register(vx_context); -vx_status Occlusion_Register(vx_context); -vx_status OcclusionbatchPD_Register(vx_context); -vx_status OcclusionbatchPDROID_Register(vx_context); -vx_status OcclusionbatchPS_Register(vx_context); -vx_status Phase_Register(vx_context); vx_status PhasebatchPD_Register(vx_context); -vx_status PhasebatchPDROID_Register(vx_context); -vx_status PhasebatchPS_Register(vx_context); -vx_status Pixelate_Register(vx_context); vx_status PixelatebatchPD_Register(vx_context); -vx_status PixelatebatchPDROID_Register(vx_context); -vx_status PixelatebatchPS_Register(vx_context); -vx_status Rain_Register(vx_context); vx_status RainbatchPD_Register(vx_context); -vx_status RainbatchPDROID_Register(vx_context); -vx_status RainbatchPS_Register(vx_context); -vx_status RandomCropLetterBox_Register(vx_context); vx_status RandomCropLetterBoxbatchPD_Register(vx_context); -vx_status RandomCropLetterBoxbatchPDROID_Register(vx_context); -vx_status RandomCropLetterBoxbatchPS_Register(vx_context); -vx_status RandomShadow_Register(vx_context); vx_status RandomShadowbatchPD_Register(vx_context); -vx_status RandomShadowbatchPDROID_Register(vx_context); -vx_status RandomShadowbatchPS_Register(vx_context); vx_status remap_Register(vx_context); -vx_status Resize_Register(vx_context); vx_status ResizebatchPD_Register(vx_context); -vx_status ResizebatchPDROID_Register(vx_context); -vx_status ResizebatchPS_Register(vx_context); -vx_status ResizeCrop_Register(vx_context); vx_status ResizeCropbatchPD_Register(vx_context); -vx_status ResizeCropbatchPDROID_Register(vx_context); -vx_status ResizeCropbatchPS_Register(vx_context); vx_status ResizeCropMirrorPD_Register(vx_context); -vx_status ResizeCropMirrorPD(vx_context); -vx_status Rotate_Register(vx_context); vx_status RotatebatchPD_Register(vx_context); -vx_status RotatebatchPDROID_Register(vx_context); -vx_status RotatebatchPS_Register(vx_context); -vx_status Saturation_Register(vx_context); vx_status SaturationbatchPD_Register(vx_context); -vx_status SaturationbatchPDROID_Register(vx_context); -vx_status SaturationbatchPS_Register(vx_context); -vx_status Scale_Register(vx_context); vx_status ScalebatchPD_Register(vx_context); -vx_status ScalebatchPDROID_Register(vx_context); -vx_status ScalebatchPS_Register(vx_context); -vx_status Snow_Register(vx_context); vx_status SnowbatchPD_Register(vx_context); -vx_status SnowbatchPDROID_Register(vx_context); -vx_status SnowbatchPS_Register(vx_context); -vx_status Sobel_Register(vx_context); vx_status SobelbatchPD_Register(vx_context); -vx_status SobelbatchPDROID_Register(vx_context); -vx_status SobelbatchPS_Register(vx_context); -vx_status Subtract_Register(vx_context); vx_status SubtractbatchPD_Register(vx_context); -vx_status SubtractbatchPDROID_Register(vx_context); -vx_status SubtractbatchPS_Register(vx_context); vx_status TensorAdd_Register(vx_context); vx_status TensorLookup_Register(vx_context); vx_status TensorMatrixMultiply_Register(vx_context); vx_status TensorMultiply_Register(vx_context); vx_status TensorSubtract_Register(vx_context); -vx_status Thresholding_Register(vx_context); vx_status ThresholdingbatchPD_Register(vx_context); -vx_status ThresholdingbatchPDROID_Register(vx_context); -vx_status ThresholdingbatchPS_Register(vx_context); -vx_status Vignette_Register(vx_context); vx_status VignettebatchPD_Register(vx_context); -vx_status VignettebatchPDROID_Register(vx_context); -vx_status VignettebatchPS_Register(vx_context); -vx_status WarpAffine_Register(vx_context); vx_status WarpAffinebatchPD_Register(vx_context); -vx_status WarpAffinebatchPDROID_Register(vx_context); -vx_status WarpAffinebatchPS_Register(vx_context); -vx_status WarpPerspective_Register(vx_context); vx_status WarpPerspectivebatchPD_Register(vx_context); -vx_status WarpPerspectivebatchPDROID_Register(vx_context); -vx_status WarpPerspectivebatchPS_Register(vx_context); // kernel names #define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop" #define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy" -#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness" -#define VX_KERNEL_RPP_BRIGHTNESSBATCHPS_NAME "org.rpp.BrightnessbatchPS" #define VX_KERNEL_RPP_BRIGHTNESSBATCHPD_NAME "org.rpp.BrightnessbatchPD" -#define VX_KERNEL_RPP_BRIGHTNESSBATCHPDROID_NAME "org.rpp.BrightnessbatchPDROID" -#define VX_KERNEL_RPP_GAMMACORRECTION_NAME "org.rpp.GammaCorrection" -#define VX_KERNEL_RPP_GAMMACORRECTIONBATCHPS_NAME "org.rpp.GammaCorrectionbatchPS" #define VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD_NAME "org.rpp.GammaCorrectionbatchPD" -#define VX_KERNEL_RPP_GAMMACORRECTIONBATCHPDROID_NAME "org.rpp.GammaCorrectionbatchPDROID" -#define VX_KERNEL_RPP_BLEND_NAME "org.rpp.Blend" -#define VX_KERNEL_RPP_BLENDBATCHPS_NAME "org.rpp.BlendbatchPS" #define VX_KERNEL_RPP_BLENDBATCHPD_NAME "org.rpp.BlendbatchPD" -#define VX_KERNEL_RPP_BLENDBATCHPDROID_NAME "org.rpp.BlendbatchPDROID" -#define VX_KERNEL_RPP_BLUR_NAME "org.rpp.Blur" -#define VX_KERNEL_RPP_BLURBATCHPS_NAME "org.rpp.BlurbatchPS" #define VX_KERNEL_RPP_BLURBATCHPD_NAME "org.rpp.BlurbatchPD" -#define VX_KERNEL_RPP_BLURBATCHPDROID_NAME "org.rpp.BlurbatchPDROID" -#define VX_KERNEL_RPP_CONTRAST_NAME "org.rpp.Contrast" -#define VX_KERNEL_RPP_CONTRASTBATCHPS_NAME "org.rpp.ContrastbatchPS" #define VX_KERNEL_RPP_CONTRASTBATCHPD_NAME "org.rpp.ContrastbatchPD" -#define VX_KERNEL_RPP_CONTRASTBATCHPDROID_NAME "org.rpp.ContrastbatchPDROID" -#define VX_KERNEL_RPP_PIXELATE_NAME "org.rpp.Pixelate" -#define VX_KERNEL_RPP_PIXELATEBATCHPS_NAME "org.rpp.PixelatebatchPS" #define VX_KERNEL_RPP_PIXELATEBATCHPD_NAME "org.rpp.PixelatebatchPD" -#define VX_KERNEL_RPP_PIXELATEBATCHPDROID_NAME "org.rpp.PixelatebatchPDROID" -#define VX_KERNEL_RPP_JITTER_NAME "org.rpp.Jitter" -#define VX_KERNEL_RPP_JITTERBATCHPS_NAME "org.rpp.JitterbatchPS" #define VX_KERNEL_RPP_JITTERBATCHPD_NAME "org.rpp.JitterbatchPD" -#define VX_KERNEL_RPP_JITTERBATCHPDROID_NAME "org.rpp.JitterbatchPDROID" -#define VX_KERNEL_RPP_OCCLUSION_NAME "org.rpp.Occlusion" -#define VX_KERNEL_RPP_OCCLUSIONBATCHPS_NAME "org.rpp.OcclusionbatchPS" -#define VX_KERNEL_RPP_OCCLUSIONBATCHPD_NAME "org.rpp.OcclusionbatchPD" -#define VX_KERNEL_RPP_OCCLUSIONBATCHPDROID_NAME "org.rpp.OcclusionbatchPDROID" -#define VX_KERNEL_RPP_SNOW_NAME "org.rpp.Snow" -#define VX_KERNEL_RPP_SNOWBATCHPS_NAME "org.rpp.SnowbatchPS" #define VX_KERNEL_RPP_SNOWBATCHPD_NAME "org.rpp.SnowbatchPD" -#define VX_KERNEL_RPP_SNOWBATCHPDROID_NAME "org.rpp.SnowbatchPDROID" -#define VX_KERNEL_RPP_NOISE_NAME "org.rpp.Noise" -#define VX_KERNEL_RPP_NOISEBATCHPS_NAME "org.rpp.NoisebatchPS" #define VX_KERNEL_RPP_NOISEBATCHPD_NAME "org.rpp.NoisebatchPD" -#define VX_KERNEL_RPP_NOISEBATCHPDROID_NAME "org.rpp.NoisebatchPDROID" -#define VX_KERNEL_RPP_RANDOMSHADOW_NAME "org.rpp.RandomShadow" -#define VX_KERNEL_RPP_RANDOMSHADOWBATCHPS_NAME "org.rpp.RandomShadowbatchPS" #define VX_KERNEL_RPP_RANDOMSHADOWBATCHPD_NAME "org.rpp.RandomShadowbatchPD" -#define VX_KERNEL_RPP_RANDOMSHADOWBATCHPDROID_NAME "org.rpp.RandomShadowbatchPDROID" -#define VX_KERNEL_RPP_FOG_NAME "org.rpp.Fog" -#define VX_KERNEL_RPP_FOGBATCHPS_NAME "org.rpp.FogbatchPS" #define VX_KERNEL_RPP_FOGBATCHPD_NAME "org.rpp.FogbatchPD" -#define VX_KERNEL_RPP_FOGBATCHPDROID_NAME "org.rpp.FogbatchPDROID" -#define VX_KERNEL_RPP_RAIN_NAME "org.rpp.Rain" -#define VX_KERNEL_RPP_RAINBATCHPS_NAME "org.rpp.RainbatchPS" #define VX_KERNEL_RPP_RAINBATCHPD_NAME "org.rpp.RainbatchPD" -#define VX_KERNEL_RPP_RAINBATCHPDROID_NAME "org.rpp.RainbatchPDROID" -#define VX_KERNEL_RPP_RANDOMCROPLETTERBOX_NAME "org.rpp.RandomCropLetterBox" -#define VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPS_NAME "org.rpp.RandomCropLetterBoxbatchPS" #define VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD_NAME "org.rpp.RandomCropLetterBoxbatchPD" -#define VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPDROID_NAME "org.rpp.RandomCropLetterBoxbatchPDROID" -#define VX_KERNEL_RPP_EXPOSURE_NAME "org.rpp.Exposure" -#define VX_KERNEL_RPP_EXPOSUREBATCHPS_NAME "org.rpp.ExposurebatchPS" #define VX_KERNEL_RPP_EXPOSUREBATCHPD_NAME "org.rpp.ExposurebatchPD" -#define VX_KERNEL_RPP_EXPOSUREBATCHPDROID_NAME "org.rpp.ExposurebatchPDROID" -#define VX_KERNEL_RPP_HISTOGRAMBALANCE_NAME "org.rpp.HistogramBalance" -#define VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPS_NAME "org.rpp.HistogramBalancebatchPS" #define VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD_NAME "org.rpp.HistogramBalancebatchPD" -#define VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPDROID_NAME "org.rpp.HistogramBalancebatchPDROID" -#define VX_KERNEL_RPP_ABSOLUTEDIFFERENCE_NAME "org.rpp.AbsoluteDifference" -#define VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPS_NAME "org.rpp.AbsoluteDifferencebatchPS" #define VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD_NAME "org.rpp.AbsoluteDifferencebatchPD" -#define VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPDROID_NAME "org.rpp.AbsoluteDifferencebatchPDROID" -#define VX_KERNEL_RPP_ACCUMULATEWEIGHTED_NAME "org.rpp.AccumulateWeighted" -#define VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPS_NAME "org.rpp.AccumulateWeightedbatchPS" #define VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD_NAME "org.rpp.AccumulateWeightedbatchPD" -#define VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPDROID_NAME "org.rpp.AccumulateWeightedbatchPDROID" -#define VX_KERNEL_RPP_ACCUMULATE_NAME "org.rpp.Accumulate" -#define VX_KERNEL_RPP_ACCUMULATEBATCHPS_NAME "org.rpp.AccumulatebatchPS" #define VX_KERNEL_RPP_ACCUMULATEBATCHPD_NAME "org.rpp.AccumulatebatchPD" -#define VX_KERNEL_RPP_ACCUMULATEBATCHPDROID_NAME "org.rpp.AccumulatebatchPDROID" -#define VX_KERNEL_RPP_ADD_NAME "org.rpp.Add" -#define VX_KERNEL_RPP_ADDBATCHPS_NAME "org.rpp.AddbatchPS" #define VX_KERNEL_RPP_ADDBATCHPD_NAME "org.rpp.AddbatchPD" -#define VX_KERNEL_RPP_ADDBATCHPDROID_NAME "org.rpp.AddbatchPDROID" -#define VX_KERNEL_RPP_SUBTRACT_NAME "org.rpp.Subtract" -#define VX_KERNEL_RPP_SUBTRACTBATCHPS_NAME "org.rpp.SubtractbatchPS" #define VX_KERNEL_RPP_SUBTRACTBATCHPD_NAME "org.rpp.SubtractbatchPD" -#define VX_KERNEL_RPP_SUBTRACTBATCHPDROID_NAME "org.rpp.SubtractbatchPDROID" -#define VX_KERNEL_RPP_MAGNITUDE_NAME "org.rpp.Magnitude" -#define VX_KERNEL_RPP_MAGNITUDEBATCHPS_NAME "org.rpp.MagnitudebatchPS" #define VX_KERNEL_RPP_MAGNITUDEBATCHPD_NAME "org.rpp.MagnitudebatchPD" -#define VX_KERNEL_RPP_MAGNITUDEBATCHPDROID_NAME "org.rpp.MagnitudebatchPDROID" -#define VX_KERNEL_RPP_MULTIPLY_NAME "org.rpp.Multiply" -#define VX_KERNEL_RPP_MULTIPLYBATCHPS_NAME "org.rpp.MultiplybatchPS" #define VX_KERNEL_RPP_MULTIPLYBATCHPD_NAME "org.rpp.MultiplybatchPD" -#define VX_KERNEL_RPP_MULTIPLYBATCHPDROID_NAME "org.rpp.MultiplybatchPDROID" -#define VX_KERNEL_RPP_PHASE_NAME "org.rpp.Phase" -#define VX_KERNEL_RPP_PHASEBATCHPS_NAME "org.rpp.PhasebatchPS" #define VX_KERNEL_RPP_PHASEBATCHPD_NAME "org.rpp.PhasebatchPD" -#define VX_KERNEL_RPP_PHASEBATCHPDROID_NAME "org.rpp.PhasebatchPDROID" -#define VX_KERNEL_RPP_ACCUMULATESQUARED_NAME "org.rpp.AccumulateSquared" -#define VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPS_NAME "org.rpp.AccumulateSquaredbatchPS" #define VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD_NAME "org.rpp.AccumulateSquaredbatchPD" -#define VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPDROID_NAME "org.rpp.AccumulateSquaredbatchPDROID" -#define VX_KERNEL_RPP_BITWISEAND_NAME "org.rpp.BitwiseAND" -#define VX_KERNEL_RPP_BITWISEANDBATCHPS_NAME "org.rpp.BitwiseANDbatchPS" #define VX_KERNEL_RPP_BITWISEANDBATCHPD_NAME "org.rpp.BitwiseANDbatchPD" -#define VX_KERNEL_RPP_BITWISEANDBATCHPDROID_NAME "org.rpp.BitwiseANDbatchPDROID" -#define VX_KERNEL_RPP_BITWISENOT_NAME "org.rpp.BitwiseNOT" -#define VX_KERNEL_RPP_BITWISENOTBATCHPS_NAME "org.rpp.BitwiseNOTbatchPS" #define VX_KERNEL_RPP_BITWISENOTBATCHPD_NAME "org.rpp.BitwiseNOTbatchPD" -#define VX_KERNEL_RPP_BITWISENOTBATCHPDROID_NAME "org.rpp.BitwiseNOTbatchPDROID" -#define VX_KERNEL_RPP_EXCLUSIVEOR_NAME "org.rpp.ExclusiveOR" -#define VX_KERNEL_RPP_EXCLUSIVEORBATCHPS_NAME "org.rpp.ExclusiveORbatchPS" #define VX_KERNEL_RPP_EXCLUSIVEORBATCHPD_NAME "org.rpp.ExclusiveORbatchPD" -#define VX_KERNEL_RPP_EXCLUSIVEORBATCHPDROID_NAME "org.rpp.ExclusiveORbatchPDROID" -#define VX_KERNEL_RPP_INCLUSIVEOR_NAME "org.rpp.InclusiveOR" -#define VX_KERNEL_RPP_INCLUSIVEORBATCHPS_NAME "org.rpp.InclusiveORbatchPS" #define VX_KERNEL_RPP_INCLUSIVEORBATCHPD_NAME "org.rpp.InclusiveORbatchPD" -#define VX_KERNEL_RPP_INCLUSIVEORBATCHPDROID_NAME "org.rpp.InclusiveORbatchPDROID" #define VX_KERNEL_RPP_HISTOGRAM_NAME "org.rpp.Histogram" -#define VX_KERNEL_RPP_THRESHOLDING_NAME "org.rpp.Thresholding" -#define VX_KERNEL_RPP_THRESHOLDINGBATCHPS_NAME "org.rpp.ThresholdingbatchPS" #define VX_KERNEL_RPP_THRESHOLDINGBATCHPD_NAME "org.rpp.ThresholdingbatchPD" -#define VX_KERNEL_RPP_THRESHOLDINGBATCHPDROID_NAME "org.rpp.ThresholdingbatchPDROID" -#define VX_KERNEL_RPP_MAX_NAME "org.rpp.Max" -#define VX_KERNEL_RPP_MAXBATCHPS_NAME "org.rpp.MaxbatchPS" #define VX_KERNEL_RPP_MAXBATCHPD_NAME "org.rpp.MaxbatchPD" -#define VX_KERNEL_RPP_MAXBATCHPDROID_NAME "org.rpp.MaxbatchPDROID" -#define VX_KERNEL_RPP_MIN_NAME "org.rpp.Min" -#define VX_KERNEL_RPP_MINBATCHPS_NAME "org.rpp.MinbatchPS" #define VX_KERNEL_RPP_MINBATCHPD_NAME "org.rpp.MinbatchPD" -#define VX_KERNEL_RPP_MINBATCHPDROID_NAME "org.rpp.MinbatchPDROID" #define VX_KERNEL_RPP_MINMAXLOC_NAME "org.rpp.MinMaxLoc" -#define VX_KERNEL_RPP_HISTOGRAMEQUALIZE_NAME "org.rpp.HistogramEqualize" -#define VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPS_NAME "org.rpp.HistogramEqualizebatchPS" #define VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD_NAME "org.rpp.HistogramEqualizebatchPD" -#define VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPDROID_NAME "org.rpp.HistogramEqualizebatchPDROID" #define VX_KERNEL_RPP_MEANSTDDEV_NAME "org.rpp.MeanStddev" -#define VX_KERNEL_RPP_FLIP_NAME "org.rpp.Flip" -#define VX_KERNEL_RPP_FLIPBATCHPS_NAME "org.rpp.FlipbatchPS" #define VX_KERNEL_RPP_FLIPBATCHPD_NAME "org.rpp.FlipbatchPD" -#define VX_KERNEL_RPP_FLIPBATCHPDROID_NAME "org.rpp.FlipbatchPDROID" -#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize" -#define VX_KERNEL_RPP_RESIZEBATCHPS_NAME "org.rpp.ResizebatchPS" #define VX_KERNEL_RPP_RESIZEBATCHPD_NAME "org.rpp.ResizebatchPD" -#define VX_KERNEL_RPP_RESIZEBATCHPDROID_NAME "org.rpp.ResizebatchPDROID" -#define VX_KERNEL_RPP_RESIZECROP_NAME "org.rpp.ResizeCrop" -#define VX_KERNEL_RPP_RESIZECROPBATCHPS_NAME "org.rpp.ResizeCropbatchPS" #define VX_KERNEL_RPP_RESIZECROPBATCHPD_NAME "org.rpp.ResizeCropbatchPD" -#define VX_KERNEL_RPP_RESIZECROPBATCHPDROID_NAME "org.rpp.ResizeCropbatchPDROID" -#define VX_KERNEL_RPP_ROTATE_NAME "org.rpp.Rotate" -#define VX_KERNEL_RPP_ROTATEBATCHPS_NAME "org.rpp.RotatebatchPS" #define VX_KERNEL_RPP_ROTATEBATCHPD_NAME "org.rpp.RotatebatchPD" -#define VX_KERNEL_RPP_ROTATEBATCHPDROID_NAME "org.rpp.RotatebatchPDROID" -#define VX_KERNEL_RPP_WARPAFFINE_NAME "org.rpp.WarpAffine" -#define VX_KERNEL_RPP_WARPAFFINEBATCHPS_NAME "org.rpp.WarpAffinebatchPS" #define VX_KERNEL_RPP_WARPAFFINEBATCHPD_NAME "org.rpp.WarpAffinebatchPD" -#define VX_KERNEL_RPP_WARPAFFINEBATCHPDROID_NAME "org.rpp.WarpAffinebatchPDROID" -#define VX_KERNEL_RPP_FISHEYE_NAME "org.rpp.Fisheye" -#define VX_KERNEL_RPP_FISHEYEBATCHPS_NAME "org.rpp.FisheyebatchPS" #define VX_KERNEL_RPP_FISHEYEBATCHPD_NAME "org.rpp.FisheyebatchPD" -#define VX_KERNEL_RPP_FISHEYEBATCHPDROID_NAME "org.rpp.FisheyebatchPDROID" -#define VX_KERNEL_RPP_LENSCORRECTION_NAME "org.rpp.LensCorrection" -#define VX_KERNEL_RPP_LENSCORRECTIONBATCHPS_NAME "org.rpp.LensCorrectionbatchPS" #define VX_KERNEL_RPP_LENSCORRECTIONBATCHPD_NAME "org.rpp.LensCorrectionbatchPD" -#define VX_KERNEL_RPP_LENSCORRECTIONBATCHPDROID_NAME "org.rpp.LensCorrectionbatchPDROID" -#define VX_KERNEL_RPP_SCALE_NAME "org.rpp.Scale" -#define VX_KERNEL_RPP_SCALEBATCHPS_NAME "org.rpp.ScalebatchPS" #define VX_KERNEL_RPP_SCALEBATCHPD_NAME "org.rpp.ScalebatchPD" -#define VX_KERNEL_RPP_SCALEBATCHPDROID_NAME "org.rpp.ScalebatchPDROID" -#define VX_KERNEL_RPP_WARPPERSPECTIVE_NAME "org.rpp.WarpPerspective" -#define VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPS_NAME "org.rpp.WarpPerspectivebatchPS" #define VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD_NAME "org.rpp.WarpPerspectivebatchPD" -#define VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPDROID_NAME "org.rpp.WarpPerspectivebatchPDROID" -#define VX_KERNEL_RPP_DILATE_NAME "org.rpp.Dilate" -#define VX_KERNEL_RPP_DILATEBATCHPS_NAME "org.rpp.DilatebatchPS" #define VX_KERNEL_RPP_DILATEBATCHPD_NAME "org.rpp.DilatebatchPD" -#define VX_KERNEL_RPP_DILATEBATCHPDROID_NAME "org.rpp.DilatebatchPDROID" -#define VX_KERNEL_RPP_ERODE_NAME "org.rpp.Erode" -#define VX_KERNEL_RPP_ERODEBATCHPS_NAME "org.rpp.ErodebatchPS" #define VX_KERNEL_RPP_ERODEBATCHPD_NAME "org.rpp.ErodebatchPD" -#define VX_KERNEL_RPP_ERODEBATCHPDROID_NAME "org.rpp.ErodebatchPDROID" -#define VX_KERNEL_RPP_HUE_NAME "org.rpp.Hue" -#define VX_KERNEL_RPP_HUEBATCHPS_NAME "org.rpp.HuebatchPS" #define VX_KERNEL_RPP_HUEBATCHPD_NAME "org.rpp.HuebatchPD" -#define VX_KERNEL_RPP_HUEBATCHPDROID_NAME "org.rpp.HuebatchPDROID" -#define VX_KERNEL_RPP_SATURATION_NAME "org.rpp.Saturation" -#define VX_KERNEL_RPP_SATURATIONBATCHPS_NAME "org.rpp.SaturationbatchPS" #define VX_KERNEL_RPP_SATURATIONBATCHPD_NAME "org.rpp.SaturationbatchPD" -#define VX_KERNEL_RPP_SATURATIONBATCHPDROID_NAME "org.rpp.SaturationbatchPDROID" -#define VX_KERNEL_RPP_COLORTEMPERATURE_NAME "org.rpp.ColorTemperature" -#define VX_KERNEL_RPP_COLORTEMPERATUREBATCHPS_NAME "org.rpp.ColorTemperaturebatchPS" #define VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD_NAME "org.rpp.ColorTemperaturebatchPD" -#define VX_KERNEL_RPP_COLORTEMPERATUREBATCHPDROID_NAME "org.rpp.ColorTemperaturebatchPDROID" -#define VX_KERNEL_RPP_VIGNETTE_NAME "org.rpp.Vignette" -#define VX_KERNEL_RPP_VIGNETTEBATCHPS_NAME "org.rpp.VignettebatchPS" #define VX_KERNEL_RPP_VIGNETTEBATCHPD_NAME "org.rpp.VignettebatchPD" -#define VX_KERNEL_RPP_VIGNETTEBATCHPDROID_NAME "org.rpp.VignettebatchPDROID" -#define VX_KERNEL_RPP_CHANNELEXTRACT_NAME "org.rpp.ChannelExtract" -#define VX_KERNEL_RPP_CHANNELEXTRACTBATCHPS_NAME "org.rpp.ChannelExtractbatchPS" #define VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD_NAME "org.rpp.ChannelExtractbatchPD" -#define VX_KERNEL_RPP_CHANNELCOMBINE_NAME "org.rpp.ChannelCombine" -#define VX_KERNEL_RPP_CHANNELCOMBINEBATCHPS_NAME "org.rpp.ChannelCombinebatchPS" #define VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD_NAME "org.rpp.ChannelCombinebatchPD" -#define VX_KERNEL_RPP_LOOKUPTABLE_NAME "org.rpp.LookUpTable" -#define VX_KERNEL_RPP_LOOKUPTABLEBATCHPS_NAME "org.rpp.LookUpTablebatchPS" #define VX_KERNEL_RPP_LOOKUPTABLEBATCHPD_NAME "org.rpp.LookUpTablebatchPD" -#define VX_KERNEL_RPP_LOOKUPTABLEBATCHPDROID_NAME "org.rpp.LookUpTablebatchPDROID" -#define VX_KERNEL_RPP_BILATERALFILTER_NAME "org.rpp.BilateralFilter" -#define VX_KERNEL_RPP_BILATERALFILTERBATCHPS_NAME "org.rpp.BilateralFilterbatchPS" #define VX_KERNEL_RPP_BILATERALFILTERBATCHPD_NAME "org.rpp.BilateralFilterbatchPD" -#define VX_KERNEL_RPP_BILATERALFILTERBATCHPDROID_NAME "org.rpp.BilateralFilterbatchPDROID" -#define VX_KERNEL_RPP_BOXFILTER_NAME "org.rpp.BoxFilter" -#define VX_KERNEL_RPP_BOXFILTERBATCHPS_NAME "org.rpp.BoxFilterbatchPS" #define VX_KERNEL_RPP_BOXFILTERBATCHPD_NAME "org.rpp.BoxFilterbatchPD" -#define VX_KERNEL_RPP_BOXFILTERBATCHPDROID_NAME "org.rpp.BoxFilterbatchPDROID" -#define VX_KERNEL_RPP_SOBEL_NAME "org.rpp.Sobel" -#define VX_KERNEL_RPP_SOBELBATCHPS_NAME "org.rpp.SobelbatchPS" #define VX_KERNEL_RPP_SOBELBATCHPD_NAME "org.rpp.SobelbatchPD" -#define VX_KERNEL_RPP_SOBELBATCHPDROID_NAME "org.rpp.SobelbatchPDROID" -#define VX_KERNEL_RPP_MEDIANFILTER_NAME "org.rpp.MedianFilter" -#define VX_KERNEL_RPP_MEDIANFILTERBATCHPS_NAME "org.rpp.MedianFilterbatchPS" #define VX_KERNEL_RPP_MEDIANFILTERBATCHPD_NAME "org.rpp.MedianFilterbatchPD" -#define VX_KERNEL_RPP_MEDIANFILTERBATCHPDROID_NAME "org.rpp.MedianFilterbatchPDROID" -#define VX_KERNEL_RPP_CUSTOMCONVOLUTION_NAME "org.rpp.CustomConvolution" -#define VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPS_NAME "org.rpp.CustomConvolutionbatchPS" #define VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD_NAME "org.rpp.CustomConvolutionbatchPD" -#define VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPDROID_NAME "org.rpp.CustomConvolutionbatchPDROID" -#define VX_KERNEL_RPP_NONMAXSUPRESSION_NAME "org.rpp.NonMaxSupression" -#define VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPS_NAME "org.rpp.NonMaxSupressionbatchPS" #define VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD_NAME "org.rpp.NonMaxSupressionbatchPD" -#define VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPDROID_NAME "org.rpp.NonMaxSupressionbatchPDROID" -#define VX_KERNEL_RPP_GAUSSIANFILTER_NAME "org.rpp.GaussianFilter" -#define VX_KERNEL_RPP_GAUSSIANFILTERBATCHPS_NAME "org.rpp.GaussianFilterbatchPS" #define VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD_NAME "org.rpp.GaussianFilterbatchPD" -#define VX_KERNEL_RPP_GAUSSIANFILTERBATCHPDROID_NAME "org.rpp.GaussianFilterbatchPDROID" -#define VX_KERNEL_RPP_NONLINEARFILTER_NAME "org.rpp.NonLinearFilter" -#define VX_KERNEL_RPP_NONLINEARFILTERBATCHPS_NAME "org.rpp.NonLinearFilterbatchPS" #define VX_KERNEL_RPP_NONLINEARFILTERBATCHPD_NAME "org.rpp.NonLinearFilterbatchPD" -#define VX_KERNEL_RPP_NONLINEARFILTERBATCHPDROID_NAME "org.rpp.NonLinearFilterbatchPDROID" -#define VX_KERNEL_RPP_LOCALBINARYPATTERN_NAME "org.rpp.LocalBinaryPattern" -#define VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPS_NAME "org.rpp.LocalBinaryPatternbatchPS" #define VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD_NAME "org.rpp.LocalBinaryPatternbatchPD" -#define VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPDROID_NAME "org.rpp.LocalBinaryPatternbatchPDROID" -#define VX_KERNEL_RPP_DATAOBJECTCOPY_NAME "org.rpp.DataObjectCopy" -#define VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPS_NAME "org.rpp.DataObjectCopybatchPS" #define VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD_NAME "org.rpp.DataObjectCopybatchPD" -#define VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPDROID_NAME "org.rpp.DataObjectCopybatchPDROID" -#define VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMID_NAME "org.rpp.GaussianImagePyramid" -#define VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPS_NAME "org.rpp.GaussianImagePyramidbatchPS" #define VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD_NAME "org.rpp.GaussianImagePyramidbatchPD" #define VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID_NAME "org.rpp.LaplacianImagePyramid" #define VX_KERNEL_RPP_CANNYEDGEDETECTOR_NAME "org.rpp.CannyEdgeDetector" #define VX_KERNEL_RPP_HARRISCORNERDETECTOR_NAME "org.rpp.HarrisCornerDetector" #define VX_KERNEL_RPP_FASTCORNERDETECTOR_NAME "org.rpp.FastCornerDetector" -#define VX_KERNEL_RPP_CONTROLFLOW_NAME "org.rpp.ControlFlow" -#define VX_KERNEL_RPP_CONTROLFLOWBATCHPS_NAME "org.rpp.ControlFlowbatchPS" -#define VX_KERNEL_RPP_CONTROLFLOWBATCHPD_NAME "org.rpp.ControlFlowbatchPD" -#define VX_KERNEL_RPP_CONTROLFLOWBATCHPDROID_NAME "org.rpp.ControlFlowbatchPDROID" #define VX_KERNEL_RPP_REMAP_NAME "org.rpp.remap" #define VX_KERNEL_RPP_TENSORADD_NAME "org.rpp.TensorAdd" #define VX_KERNEL_RPP_TENSORSUBTRACT_NAME "org.rpp.TensorSubtract" #define VX_KERNEL_RPP_TENSORMULTIPLY_NAME "org.rpp.TensorMultiply" #define VX_KERNEL_RPP_TENSORMATRIXMULTIPLY_NAME "org.rpp.TensorMatrixMultiply" #define VX_KERNEL_RPP_TENSORLOOKUP_NAME "org.rpp.TensorLookup" -#define VX_KERNEL_RPP_COLORTWIST_NAME "org.rpp.ColorTwist" #define VX_KERNEL_RPP_COLORTWISTBATCHPD_NAME "org.rpp.ColorTwistbatchPD" #define VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD_NAME "org.rpp.CropMirrorNormalizebatchPD" #define VX_KERNEL_RPP_CROPPD_NAME "org.rpp.CropPD" #define VX_KERNEL_RPP_RESIZECROPMIRRORPD_NAME "org.rpp.ResizeCropMirrorPD" -// #define VX_KERNEL_RPP_COLORTWISTBATCHPS_NAME "org.rpp.ColorTwistPS" -// #define VX_KERNEL_RPP_COLORTWISTBATCHPDROID_NAME "org.rpp.ColorTwistPDROID" #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_ diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index e8dec73849..bc73decdb4 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -23,286 +23,98 @@ THE SOFTWARE. #ifndef _VX_KERNELS_RPP_H_ #define _VX_KERNELS_RPP_H_ -#ifdef __cplusplus -extern "C" { +#ifdef __cplusplus +extern "C" +{ #endif #define VX_LIBRARY_RPP 5 -enum vx_kernel_ext_amd_rpp_e -{ - VX_KERNEL_RPP_BRIGHTNESS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x0, - VX_KERNEL_RPP_BRIGHTNESSBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1, - VX_KERNEL_RPP_BRIGHTNESSBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2, - VX_KERNEL_RPP_BRIGHTNESSBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3, - VX_KERNEL_RPP_GAMMACORRECTION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4, - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5, - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6, - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7, - VX_KERNEL_RPP_BLEND = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x8, - VX_KERNEL_RPP_BLENDBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9, - VX_KERNEL_RPP_BLENDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa, - VX_KERNEL_RPP_BLENDBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb, - VX_KERNEL_RPP_BLUR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc, - VX_KERNEL_RPP_BLURBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd, - VX_KERNEL_RPP_BLURBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe, - VX_KERNEL_RPP_BLURBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf, - VX_KERNEL_RPP_CONTRAST = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x10, - VX_KERNEL_RPP_CONTRASTBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11, - VX_KERNEL_RPP_CONTRASTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x12, - VX_KERNEL_RPP_CONTRASTBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x13, - VX_KERNEL_RPP_PIXELATE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x14, - VX_KERNEL_RPP_PIXELATEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x15, - VX_KERNEL_RPP_PIXELATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x16, - VX_KERNEL_RPP_PIXELATEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x17, - VX_KERNEL_RPP_JITTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x18, - VX_KERNEL_RPP_JITTERBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x19, - VX_KERNEL_RPP_JITTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1a, - VX_KERNEL_RPP_JITTERBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1b, - VX_KERNEL_RPP_OCCLUSION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1c, - VX_KERNEL_RPP_OCCLUSIONBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1d, - VX_KERNEL_RPP_OCCLUSIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1e, - VX_KERNEL_RPP_OCCLUSIONBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x1f, - VX_KERNEL_RPP_SNOW = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x20, - VX_KERNEL_RPP_SNOWBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x21, - VX_KERNEL_RPP_SNOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x22, - VX_KERNEL_RPP_SNOWBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x23, - VX_KERNEL_RPP_NOISE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x24, - VX_KERNEL_RPP_NOISEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x25, - VX_KERNEL_RPP_NOISEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x26, - VX_KERNEL_RPP_NOISEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x27, - VX_KERNEL_RPP_RANDOMSHADOW = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x28, - VX_KERNEL_RPP_RANDOMSHADOWBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x29, - VX_KERNEL_RPP_RANDOMSHADOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2a, - VX_KERNEL_RPP_RANDOMSHADOWBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2b, - VX_KERNEL_RPP_FOG = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2c, - VX_KERNEL_RPP_FOGBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2d, - VX_KERNEL_RPP_FOGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2e, - VX_KERNEL_RPP_FOGBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x2f, - VX_KERNEL_RPP_RAIN = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x30, - VX_KERNEL_RPP_RAINBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x31, - VX_KERNEL_RPP_RAINBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x32, - VX_KERNEL_RPP_RAINBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x33, - VX_KERNEL_RPP_RANDOMCROPLETTERBOX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x34, - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x35, - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x36, - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x37, - VX_KERNEL_RPP_EXPOSURE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x38, - VX_KERNEL_RPP_EXPOSUREBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x39, - VX_KERNEL_RPP_EXPOSUREBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3a, - VX_KERNEL_RPP_EXPOSUREBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3b, - VX_KERNEL_RPP_HISTOGRAMBALANCE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3c, - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3d, - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3e, - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x3f, - VX_KERNEL_RPP_ABSOLUTEDIFFERENCE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x40, - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x41, - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x42, - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x43, - VX_KERNEL_RPP_ACCUMULATEWEIGHTED = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x44, - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x45, - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x46, - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x47, - VX_KERNEL_RPP_ACCUMULATE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x48, - VX_KERNEL_RPP_ACCUMULATEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x49, - VX_KERNEL_RPP_ACCUMULATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4a, - VX_KERNEL_RPP_ACCUMULATEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4b, - VX_KERNEL_RPP_ADD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4c, - VX_KERNEL_RPP_ADDBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4d, - VX_KERNEL_RPP_ADDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4e, - VX_KERNEL_RPP_ADDBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x4f, - VX_KERNEL_RPP_SUBTRACT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x50, - VX_KERNEL_RPP_SUBTRACTBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x51, - VX_KERNEL_RPP_SUBTRACTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x52, - VX_KERNEL_RPP_SUBTRACTBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x53, - VX_KERNEL_RPP_MAGNITUDE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x54, - VX_KERNEL_RPP_MAGNITUDEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x55, - VX_KERNEL_RPP_MAGNITUDEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x56, - VX_KERNEL_RPP_MAGNITUDEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x57, - VX_KERNEL_RPP_MULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x58, - VX_KERNEL_RPP_MULTIPLYBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x59, - VX_KERNEL_RPP_MULTIPLYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5a, - VX_KERNEL_RPP_MULTIPLYBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5b, - VX_KERNEL_RPP_PHASE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5c, - VX_KERNEL_RPP_PHASEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5d, - VX_KERNEL_RPP_PHASEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5e, - VX_KERNEL_RPP_PHASEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x5f, - VX_KERNEL_RPP_ACCUMULATESQUARED = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x60, - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x61, - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x62, - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x63, - VX_KERNEL_RPP_BITWISEAND = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x64, - VX_KERNEL_RPP_BITWISEANDBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x65, - VX_KERNEL_RPP_BITWISEANDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x66, - VX_KERNEL_RPP_BITWISEANDBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x67, - VX_KERNEL_RPP_BITWISENOT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x68, - VX_KERNEL_RPP_BITWISENOTBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x69, - VX_KERNEL_RPP_BITWISENOTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6a, - VX_KERNEL_RPP_BITWISENOTBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6b, - VX_KERNEL_RPP_EXCLUSIVEOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6c, - VX_KERNEL_RPP_EXCLUSIVEORBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6d, - VX_KERNEL_RPP_EXCLUSIVEORBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6e, - VX_KERNEL_RPP_EXCLUSIVEORBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x6f, - VX_KERNEL_RPP_INCLUSIVEOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x70, - VX_KERNEL_RPP_INCLUSIVEORBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x71, - VX_KERNEL_RPP_INCLUSIVEORBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x72, - VX_KERNEL_RPP_INCLUSIVEORBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x73, - VX_KERNEL_RPP_HISTOGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x74, - VX_KERNEL_RPP_THRESHOLDING = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x78, - VX_KERNEL_RPP_THRESHOLDINGBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x79, - VX_KERNEL_RPP_THRESHOLDINGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7a, - VX_KERNEL_RPP_THRESHOLDINGBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7b, - VX_KERNEL_RPP_MAX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7c, - VX_KERNEL_RPP_MAXBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7d, - VX_KERNEL_RPP_MAXBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7e, - VX_KERNEL_RPP_MAXBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x7f, - VX_KERNEL_RPP_MIN = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x80, - VX_KERNEL_RPP_MINBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x81, - VX_KERNEL_RPP_MINBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x82, - VX_KERNEL_RPP_MINBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x83, - VX_KERNEL_RPP_MINMAXLOC = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x84, - VX_KERNEL_RPP_HISTOGRAMEQUALIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x8c, - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x8d, - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x8e, - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x8f, - VX_KERNEL_RPP_MEANSTDDEV = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x90, - VX_KERNEL_RPP_FLIP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x94, - VX_KERNEL_RPP_FLIPBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x95, - VX_KERNEL_RPP_FLIPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x96, - VX_KERNEL_RPP_FLIPBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x97, - VX_KERNEL_RPP_RESIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x98, - VX_KERNEL_RPP_RESIZEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x99, - VX_KERNEL_RPP_RESIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9a, - VX_KERNEL_RPP_RESIZEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9b, - VX_KERNEL_RPP_RESIZECROP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9c, - VX_KERNEL_RPP_RESIZECROPBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9d, - VX_KERNEL_RPP_RESIZECROPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9e, - VX_KERNEL_RPP_RESIZECROPBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x9f, - VX_KERNEL_RPP_ROTATE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa0, - VX_KERNEL_RPP_ROTATEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa1, - VX_KERNEL_RPP_ROTATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa2, - VX_KERNEL_RPP_ROTATEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa3, - VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa4, - VX_KERNEL_RPP_WARPAFFINEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa5, - VX_KERNEL_RPP_WARPAFFINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa6, - VX_KERNEL_RPP_WARPAFFINEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa7, - VX_KERNEL_RPP_FISHEYE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa8, - VX_KERNEL_RPP_FISHEYEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xa9, - VX_KERNEL_RPP_FISHEYEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xaa, - VX_KERNEL_RPP_FISHEYEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xab, - VX_KERNEL_RPP_LENSCORRECTION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xac, - VX_KERNEL_RPP_LENSCORRECTIONBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xad, - VX_KERNEL_RPP_LENSCORRECTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xae, - VX_KERNEL_RPP_LENSCORRECTIONBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xaf, - VX_KERNEL_RPP_SCALE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb0, - VX_KERNEL_RPP_SCALEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb1, - VX_KERNEL_RPP_SCALEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb2, - VX_KERNEL_RPP_SCALEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb3, - VX_KERNEL_RPP_WARPPERSPECTIVE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb4, - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb5, - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb6, - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb7, - VX_KERNEL_RPP_DILATE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb8, - VX_KERNEL_RPP_DILATEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xb9, - VX_KERNEL_RPP_DILATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xba, - VX_KERNEL_RPP_DILATEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xbb, - VX_KERNEL_RPP_ERODE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xbc, - VX_KERNEL_RPP_ERODEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xbd, - VX_KERNEL_RPP_ERODEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xbe, - VX_KERNEL_RPP_ERODEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xbf, - VX_KERNEL_RPP_HUE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc0, - VX_KERNEL_RPP_HUEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc1, - VX_KERNEL_RPP_HUEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc2, - VX_KERNEL_RPP_HUEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc3, - VX_KERNEL_RPP_SATURATION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc4, - VX_KERNEL_RPP_SATURATIONBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc5, - VX_KERNEL_RPP_SATURATIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc6, - VX_KERNEL_RPP_SATURATIONBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc7, - VX_KERNEL_RPP_COLORTEMPERATURE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc8, - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xc9, - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xca, - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xcb, - VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xcc, - VX_KERNEL_RPP_VIGNETTEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xcd, - VX_KERNEL_RPP_VIGNETTEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xce, - VX_KERNEL_RPP_VIGNETTEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xcf, - VX_KERNEL_RPP_CHANNELEXTRACT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd0, - VX_KERNEL_RPP_CHANNELEXTRACTBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd1, - VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd2, - VX_KERNEL_RPP_CHANNELCOMBINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd4, - VX_KERNEL_RPP_CHANNELCOMBINEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd5, - VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd6, - VX_KERNEL_RPP_LOOKUPTABLE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd8, - VX_KERNEL_RPP_LOOKUPTABLEBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xd9, - VX_KERNEL_RPP_LOOKUPTABLEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xda, - VX_KERNEL_RPP_LOOKUPTABLEBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xdb, - VX_KERNEL_RPP_BILATERALFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xdc, - VX_KERNEL_RPP_BILATERALFILTERBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xdd, - VX_KERNEL_RPP_BILATERALFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xde, - VX_KERNEL_RPP_BILATERALFILTERBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xdf, - VX_KERNEL_RPP_BOXFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe0, - VX_KERNEL_RPP_BOXFILTERBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe1, - VX_KERNEL_RPP_BOXFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe2, - VX_KERNEL_RPP_BOXFILTERBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe3, - VX_KERNEL_RPP_SOBEL = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe4, - VX_KERNEL_RPP_SOBELBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe5, - VX_KERNEL_RPP_SOBELBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe6, - VX_KERNEL_RPP_SOBELBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe7, - VX_KERNEL_RPP_MEDIANFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe8, - VX_KERNEL_RPP_MEDIANFILTERBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xe9, - VX_KERNEL_RPP_MEDIANFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xea, - VX_KERNEL_RPP_MEDIANFILTERBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xeb, - VX_KERNEL_RPP_CUSTOMCONVOLUTION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xec, - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xed, - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xee, - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xef, - VX_KERNEL_RPP_NONMAXSUPRESSION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf0, - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf1, - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf2, - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf3, - VX_KERNEL_RPP_GAUSSIANFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf4, - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf5, - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf6, - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf7, - VX_KERNEL_RPP_NONLINEARFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf8, - VX_KERNEL_RPP_NONLINEARFILTERBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xf9, - VX_KERNEL_RPP_NONLINEARFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xfa, - VX_KERNEL_RPP_NONLINEARFILTERBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xfb, - VX_KERNEL_RPP_LOCALBINARYPATTERN = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xfc, - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xfd, - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xfe, - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0xff, - VX_KERNEL_RPP_DATAOBJECTCOPY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x100, - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x101, - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x102, - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x103, - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x104, - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x105, - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x106, - VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x108, - VX_KERNEL_RPP_CANNYEDGEDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x10c, - VX_KERNEL_RPP_HARRISCORNERDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x110, - VX_KERNEL_RPP_FASTCORNERDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x114, - VX_KERNEL_RPP_CONTROLFLOW = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x118, - VX_KERNEL_RPP_CONTROLFLOWBATCHPS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x119, - VX_KERNEL_RPP_CONTROLFLOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11a, - VX_KERNEL_RPP_CONTROLFLOWBATCHPDROID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11b, - VX_KERNEL_RPP_REMAP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11c, - VX_KERNEL_RPP_TENSORADD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11d, - VX_KERNEL_RPP_TENSORSUBTRACT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11e, - VX_KERNEL_RPP_TENSORMULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x11f, - VX_KERNEL_RPP_TENSORMATRIXMULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x120, - VX_KERNEL_RPP_TENSORLOOKUP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x121, - VX_KERNEL_RPP_COLORTWIST = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x122, - VX_KERNEL_RPP_COLORTWISTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x123, - VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x124, - VX_KERNEL_RPP_CROPPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x125, - VX_KERNEL_RPP_RESIZECROPMIRRORPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) +0x126, - VX_KERNEL_RPP_COPY, //= VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x122, - VX_KERNEL_RPP_NOP //= VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x123, -}; + enum vx_kernel_ext_amd_rpp_e + { + VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x0, + VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1, + VX_KERNEL_RPP_ACCUMULATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2, + VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3, + VX_KERNEL_RPP_ADDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4, + VX_KERNEL_RPP_BLENDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x5, + VX_KERNEL_RPP_BLURBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x6, + VX_KERNEL_RPP_BITWISEANDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7, + VX_KERNEL_RPP_BITWISENOTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x8, + VX_KERNEL_RPP_BILATERALFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x9, + VX_KERNEL_RPP_BRIGHTNESSBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xa, + VX_KERNEL_RPP_BOXFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xb, + VX_KERNEL_RPP_CONTRASTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xc, + VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xd, + VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xe, + VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0xf, + VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x10, + VX_KERNEL_RPP_CANNYEDGEDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x11, + VX_KERNEL_RPP_COLORTWISTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x13, + VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x14, + VX_KERNEL_RPP_CROPPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x15, + VX_KERNEL_RPP_COPY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x16, + VX_KERNEL_RPP_DILATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x17, + VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x18, + VX_KERNEL_RPP_EXPOSUREBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x19, + VX_KERNEL_RPP_EXCLUSIVEORBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1a, + VX_KERNEL_RPP_ERODEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1b, + VX_KERNEL_RPP_FLIPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1c, + VX_KERNEL_RPP_FOGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1d, + VX_KERNEL_RPP_FISHEYEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1e, + VX_KERNEL_RPP_FASTCORNERDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x1f, + VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x20, + VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x21, + VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x22, + VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x23, + VX_KERNEL_RPP_HISTOGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x24, + VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x25, + VX_KERNEL_RPP_HUEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x26, + VX_KERNEL_RPP_HARRISCORNERDETECTOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x27, + VX_KERNEL_RPP_INCLUSIVEORBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x28, + VX_KERNEL_RPP_JITTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x29, + VX_KERNEL_RPP_LENSCORRECTIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2a, + VX_KERNEL_RPP_LOOKUPTABLEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2b, + VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2c, + VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2d, + VX_KERNEL_RPP_MAGNITUDEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2e, + VX_KERNEL_RPP_MULTIPLYBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x2f, + VX_KERNEL_RPP_MAXBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x30, + VX_KERNEL_RPP_MINBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x31, + VX_KERNEL_RPP_MINMAXLOC = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x32, + VX_KERNEL_RPP_MEANSTDDEV = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x33, + VX_KERNEL_RPP_MEDIANFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x34, + VX_KERNEL_RPP_NOISEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x35, + VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x36, + VX_KERNEL_RPP_NONLINEARFILTERBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x37, + VX_KERNEL_RPP_NOP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x38, + VX_KERNEL_RPP_PIXELATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x39, + VX_KERNEL_RPP_PHASEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3a, + VX_KERNEL_RPP_RANDOMSHADOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3b, + VX_KERNEL_RPP_RAINBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3c, + VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3d, + VX_KERNEL_RPP_RESIZEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3e, + VX_KERNEL_RPP_RESIZECROPBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x3f, + VX_KERNEL_RPP_ROTATEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x40, + VX_KERNEL_RPP_REMAP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x41, + VX_KERNEL_RPP_RESIZECROPMIRRORPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x42, + VX_KERNEL_RPP_SNOWBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x43, + VX_KERNEL_RPP_SUBTRACTBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x44, + VX_KERNEL_RPP_SCALEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x45, + VX_KERNEL_RPP_SATURATIONBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x46, + VX_KERNEL_RPP_SOBELBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x47, + VX_KERNEL_RPP_THRESHOLDINGBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x48, + VX_KERNEL_RPP_TENSORADD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x49, + VX_KERNEL_RPP_TENSORSUBTRACT = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4a, + VX_KERNEL_RPP_TENSORMULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4b, + VX_KERNEL_RPP_TENSORMATRIXMULTIPLY = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4c, + VX_KERNEL_RPP_TENSORLOOKUP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4d, + VX_KERNEL_RPP_VIGNETTEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4e, + VX_KERNEL_RPP_WARPAFFINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4f, + VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x50, + }; -#ifdef __cplusplus +#ifdef __cplusplus } #endif diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index 5a285c8eb0..8317d27089 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -46,275 +46,86 @@ extern "C" { RPP VX_API_ENTRY C Function NODE *************************************************************************************************************/ -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AbsoluteDifference(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AbsoluteDifferencebatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AbsoluteDifferencebatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AbsoluteDifferencebatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Accumulate(vx_graph graph,vx_image pSrc1,vx_image pSrc2); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulatebatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulatebatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulatebatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateSquared(vx_graph graph,vx_image pSrc); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateSquaredbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateSquaredbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateSquaredbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateWeighted(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_scalar alpha); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateWeightedbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_array alpha,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateWeightedbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_array alpha,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AccumulateWeightedbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_scalar alpha,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Add(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AddbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AddbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_AddbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BilateralFilter(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize,vx_scalar sigmaI,vx_scalar sigmaS); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BilateralFilterbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array sigmaI,vx_array sigmaS,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BilateralFilterbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array sigmaI,vx_array sigmaS,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BilateralFilterbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_scalar sigmaI,vx_scalar sigmaS,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseAND(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseANDbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseANDbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseANDbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseNOT(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseNOTbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseNOTbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BitwiseNOTbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Blend(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst,vx_scalar alpha); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlendbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array alpha,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlendbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array alpha,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlendbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar alpha,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Blur(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlurbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlurbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BlurbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BoxFilter(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BoxFilterbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BoxFilterbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BoxFilterbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Brightness(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar alpha,vx_scalar beta); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BrightnessbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array alpha,vx_array beta,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BrightnessbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array alpha,vx_array beta,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_BrightnessbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar alpha,vx_scalar beta,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CannyEdgeDetector(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar max,vx_scalar min); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelCombine(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pSrc3,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelCombinebatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pSrc3,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelCombinebatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pSrc3,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelExtract(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar extractChannelNumber); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelExtractbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array extractChannelNumber,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ChannelExtractbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar extractChannelNumber,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTemperature(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar adjustmentValue); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTemperaturebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array adjustmentValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTemperaturebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array adjustmentValue,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTemperaturebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar adjustmentValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTwist(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar alpha,vx_scalar beta,vx_scalar hue,vx_scalar sat); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ColorTwistbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array alpha,vx_array beta,vx_array hue, vx_array sat,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Contrast(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar min,vx_scalar max); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ContrastbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array min,vx_array max,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ContrastbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array min,vx_array max,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ContrastbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar min,vx_scalar max,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ControlFlow(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst,vx_scalar type); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ControlFlowbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array type,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ControlFlowbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array type,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ControlFlowbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar type,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Copy(vx_graph graph, vx_image pSrc, vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CropMirrorNormalizebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1,vx_array mean, vx_array std_dev, vx_array flip, vx_scalar chnShift,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CropPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1, vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CustomConvolution(vx_graph graph,vx_image pSrc,vx_image pDst,vx_array kernel,vx_scalar kernelWidth,vx_scalar kernelHeight); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CustomConvolutionbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernel,vx_array kernelWidth,vx_array kernelHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CustomConvolutionbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernel,vx_array kernelWidth,vx_array kernelHeight,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CustomConvolutionbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernel,vx_scalar kernelWidth,vx_scalar kernelHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DataObjectCopy(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DataObjectCopybatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DataObjectCopybatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DataObjectCopybatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Dilate(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DilatebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DilatebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_DilatebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Erode(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ErodebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ErodebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ErodebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExclusiveOR(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExclusiveORbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExclusiveORbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExclusiveORbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Exposure(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar exposureValue); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExposurebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array exposureValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExposurebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array exposureValue,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ExposurebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar exposureValue,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FastCornerDetector(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar noOfPixels,vx_scalar threshold,vx_scalar nonMaxKernelSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Fisheye(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FisheyebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FisheyebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FisheyebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Flip(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar flipAxis); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FlipbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array flipAxis,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FlipbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array flipAxis,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FlipbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar flipAxis,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Fog(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar fogValue); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FogbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array fogValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FogbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array fogValue,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_FogbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar fogValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GammaCorrection(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar gamma); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GammaCorrectionbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array gamma,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GammaCorrectionbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array gamma,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GammaCorrectionbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar gamma,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianFilter(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar stdDev,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianFilterbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array stdDev,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianFilterbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array stdDev,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianFilterbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar stdDev,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianImagePyramid(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar stdDev,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianImagePyramidbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array stdDev,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_GaussianImagePyramidbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar stdDev,vx_scalar kernelSize,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HarrisCornerDetector(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar gaussianKernelSize,vx_scalar stdDev,vx_scalar kernelSize,vx_scalar kValue,vx_scalar threshold,vx_scalar nonMaxKernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Histogram(vx_graph graph,vx_image pSrc,vx_array outputHistogram,vx_scalar bins); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramBalance(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramBalancebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramBalancebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramBalancebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramEqualize(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramEqualizebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramEqualizebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HistogramEqualizebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Hue(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar hueShift); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HuebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array hueShift,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HuebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array hueShift,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_HuebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar hueShift,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_InclusiveOR(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_InclusiveORbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_InclusiveORbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_InclusiveORbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Jitter(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_JitterbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_JitterbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_JitterbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LaplacianImagePyramid(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar stdDev,vx_scalar kernelSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LensCorrection(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar strength,vx_scalar zoom); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LensCorrectionbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array strength,vx_array zoom,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LensCorrectionbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array strength,vx_array zoom,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LensCorrectionbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar strength,vx_scalar zoom,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LocalBinaryPattern(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LocalBinaryPatternbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LocalBinaryPatternbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LocalBinaryPatternbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LookUpTable(vx_graph graph,vx_image pSrc,vx_image pDst,vx_array lutPtr); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LookUpTablebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array lutPtr,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LookUpTablebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array lutPtr,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_LookUpTablebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array lutPtr,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Magnitude(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MagnitudebatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MagnitudebatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MagnitudebatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Max(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MaxbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MaxbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MaxbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MeanStddev(vx_graph graph,vx_image pSrc,vx_scalar mean,vx_scalar stdDev); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MedianFilter(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MedianFilterbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MedianFilterbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MedianFilterbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Min(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MinbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MinbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MinbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MinMaxLoc(vx_graph graph,vx_image pSrc,vx_scalar min,vx_scalar max,vx_scalar minLoc,vx_scalar maxLoc); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Multiply(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MultiplybatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MultiplybatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_MultiplybatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Noise(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar noiseProbability); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NoisebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array noiseProbability,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NoisebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array noiseProbability,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NoisebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar noiseProbability,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonLinearFilter(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonLinearFilterbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonLinearFilterbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonLinearFilterbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonMaxSupression(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar kernelSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonMaxSupressionbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonMaxSupressionbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array kernelSize,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_NonMaxSupressionbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar kernelSize,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Nop(vx_graph graph, vx_image pSrc, vx_image pDst); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Occlusion(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst,vx_scalar src1x1,vx_scalar src1y1,vx_scalar src1x2,vx_scalar src1y2,vx_scalar src2x1,vx_scalar src2y1,vx_scalar src2x2,vx_scalar src2y2); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_OcclusionbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array src1x1,vx_array src1y1,vx_array src1x2,vx_array src1y2,vx_array src2x1,vx_array src2y1,vx_array src2x2,vx_array src2y2,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_OcclusionbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array src1x1,vx_array src1y1,vx_array src1x2,vx_array src1y2,vx_array src2x1,vx_array src2y1,vx_array src2x2,vx_array src2y2,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_OcclusionbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar src1x1,vx_scalar src1y1,vx_scalar src1x2,vx_scalar src1y2,vx_scalar src2x1,vx_scalar src2y1,vx_scalar src2x2,vx_scalar src2y2,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Phase(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PhasebatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PhasebatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PhasebatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Pixelate(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PixelatebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PixelatebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_PixelatebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Rain(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar rainValue,vx_scalar rainWidth,vx_scalar rainHeight,vx_scalar rainTransperancy); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RainbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array rainValue,vx_array rainWidth,vx_array rainHeight,vx_array rainTransperancy,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RainbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array rainValue,vx_array rainWidth,vx_array rainHeight,vx_array rainTransperancy,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RainbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar rainValue,vx_scalar rainWidth,vx_scalar rainHeight,vx_scalar rainTransperancy,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomCropLetterBox(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar x1,vx_scalar y1,vx_scalar x2,vx_scalar y2); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomCropLetterBoxbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1,vx_array x2,vx_array y2,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomCropLetterBoxbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1,vx_array x2,vx_array y2,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomCropLetterBoxbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_scalar x1,vx_scalar y1,vx_scalar x2,vx_scalar y2,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomShadow(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar x1,vx_scalar y1,vx_scalar x2,vx_scalar y2,vx_scalar numberOfShadows,vx_scalar maxSizeX,vx_scalar maxSizeY); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomShadowbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array x1,vx_array y1,vx_array x2,vx_array y2,vx_array numberOfShadows,vx_array maxSizeX,vx_array maxSizeY,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomShadowbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array x1,vx_array y1,vx_array x2,vx_array y2,vx_array numberOfShadows,vx_array maxSizeX,vx_array maxSizeY,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RandomShadowbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar x1,vx_scalar y1,vx_scalar x2,vx_scalar y2,vx_scalar numberOfShadows,vx_scalar maxSizeX,vx_scalar maxSizeY,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_remap(vx_graph graph,vx_image pSrc,vx_image pDst,vx_array rowRemap,vx_array colRemap); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Resize(vx_graph graph,vx_image pSrc,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCrop(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar x1,vx_scalar y1,vx_scalar x2,vx_scalar y2); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCropbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1,vx_array x2,vx_array y2,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCropbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1,vx_array x2,vx_array y2,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCropbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_scalar x1,vx_scalar y1,vx_scalar x2,vx_scalar y2,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ResizeCropMirrorPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array x1,vx_array y1,vx_array x2,vx_array y2, vx_array mirrorFlag, vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Rotate(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar angle); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RotatebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array angle,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RotatebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array angle,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_RotatebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_scalar angle,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Saturation(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar saturationFactor); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SaturationbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array saturationFactor,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SaturationbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array saturationFactor,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SaturationbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar saturationFactor,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Scale(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar percentage); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ScalebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array percentage,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ScalebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array percentage,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ScalebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_scalar percentage,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Snow(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar snowValue); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SnowbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array snowValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SnowbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array snowValue,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SnowbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar snowValue,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Sobel(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar sobelType); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SobelbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array sobelType,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SobelbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array sobelType,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SobelbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar sobelType,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Subtract(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_image pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SubtractbatchPD(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SubtractbatchPDROID(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SubtractbatchPS(vx_graph graph,vx_image pSrc1,vx_image pSrc2,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorAdd(vx_graph graph,vx_array pSrc1,vx_array pSrc2,vx_array pDst,vx_scalar tensorDimensions,vx_array tensorDimensionValues); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorLookup(vx_graph graph,vx_array pSrc,vx_array pDst,vx_array lutPtr,vx_scalar tensorDimensions,vx_array tensorDimensionValues); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorMatrixMultiply(vx_graph graph,vx_array pSrc1,vx_array pSrc2,vx_array pDst,vx_array tensorDimensionValues1,vx_array tensorDimensionValues2); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorMultiply(vx_graph graph,vx_array pSrc1,vx_array pSrc2,vx_array pDst,vx_scalar tensorDimensions,vx_array tensorDimensionValues); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_TensorSubtract(vx_graph graph,vx_array pSrc1,vx_array pSrc2,vx_array pDst,vx_scalar tensorDimensions,vx_array tensorDimensionValues); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Thresholding(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar min,vx_scalar max); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ThresholdingbatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array min,vx_array max,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ThresholdingbatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array min,vx_array max,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ThresholdingbatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar min,vx_scalar max,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Vignette(vx_graph graph,vx_image pSrc,vx_image pDst,vx_scalar stdDev); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_VignettebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array stdDev,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_VignettebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array stdDev,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_VignettebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar stdDev,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffine(vx_graph graph,vx_image pSrc,vx_image pDst,vx_array affine); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array affine,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array affine,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_array affine,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspective(vx_graph graph,vx_image pSrc,vx_image pDst,vx_array perspective); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array perspective,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPDROID(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array perspective,vx_array roiX,vx_array roiY,vx_array roiWidth,vx_array roiHeight,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPS(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_scalar dstImgWidth,vx_scalar dstImgHeight,vx_array perspective,vx_uint32 nbatchSize); #ifdef __cplusplus } diff --git a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifference.cpp b/amd_openvx_extensions/amd_rpp/source/AbsoluteDifference.cpp deleted file mode 100644 index d47d594d5a..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifference.cpp +++ /dev/null @@ -1,208 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AbsoluteDifferenceLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAbsoluteDifference(vx_node node, const vx_reference *parameters, vx_uint32 num, AbsoluteDifferenceLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAbsoluteDifference(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifference: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifference: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAbsoluteDifference(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AbsoluteDifferenceLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAbsoluteDifference(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAbsoluteDifference(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAbsoluteDifference(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferenceLocalData * data = new AbsoluteDifferenceLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAbsoluteDifference(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAbsoluteDifference(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferenceLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AbsoluteDifference_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AbsoluteDifference", - VX_KERNEL_RPP_ABSOLUTEDIFFERENCE, - processAbsoluteDifference, - 4, - validateAbsoluteDifference, - initializeAbsoluteDifference, - uninitializeAbsoluteDifference); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPD.cpp index c26138b84d..4aba71821c 100644 --- a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPD.cpp @@ -22,209 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct AbsoluteDifferencebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct AbsoluteDifferencebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AbsoluteDifferencebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateAbsoluteDifferencebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processAbsoluteDifferencebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AbsoluteDifferencebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + AbsoluteDifferencebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshAbsoluteDifferencebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_absolute_difference_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshAbsoluteDifferencebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_absolute_difference_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshAbsoluteDifferencebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_absolute_difference_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AbsoluteDifferencebatchPDLocalData * data = new AbsoluteDifferencebatchPDLocalData; - memset(data, 0, sizeof(*data)); + AbsoluteDifferencebatchPDLocalData *data = new AbsoluteDifferencebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAbsoluteDifferencebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshAbsoluteDifferencebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeAbsoluteDifferencebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AbsoluteDifferencebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + AbsoluteDifferencebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status AbsoluteDifferencebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AbsoluteDifferencebatchPD", - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD, - processAbsoluteDifferencebatchPD, - 7, - validateAbsoluteDifferencebatchPD, - initializeAbsoluteDifferencebatchPD, - uninitializeAbsoluteDifferencebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AbsoluteDifferencebatchPD", + VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPD, + processAbsoluteDifferencebatchPD, + 7, + validateAbsoluteDifferencebatchPD, + initializeAbsoluteDifferencebatchPD, + uninitializeAbsoluteDifferencebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPDROID.cpp deleted file mode 100644 index 99bac0fb65..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AbsoluteDifferencebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAbsoluteDifferencebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, AbsoluteDifferencebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAbsoluteDifferencebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAbsoluteDifferencebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AbsoluteDifferencebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAbsoluteDifferencebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAbsoluteDifferencebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAbsoluteDifferencebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferencebatchPDROIDLocalData * data = new AbsoluteDifferencebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAbsoluteDifferencebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAbsoluteDifferencebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferencebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AbsoluteDifferencebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AbsoluteDifferencebatchPDROID", - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPDROID, - processAbsoluteDifferencebatchPDROID, - 11, - validateAbsoluteDifferencebatchPDROID, - initializeAbsoluteDifferencebatchPDROID, - uninitializeAbsoluteDifferencebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPS.cpp deleted file mode 100644 index 4ae34b23b1..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AbsoluteDifferencebatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AbsoluteDifferencebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAbsoluteDifferencebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, AbsoluteDifferencebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAbsoluteDifferencebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AbsoluteDifferencebatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAbsoluteDifferencebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AbsoluteDifferencebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAbsoluteDifferencebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAbsoluteDifferencebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_absolute_difference_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_absolute_difference_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAbsoluteDifferencebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferencebatchPSLocalData * data = new AbsoluteDifferencebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAbsoluteDifferencebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAbsoluteDifferencebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AbsoluteDifferencebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AbsoluteDifferencebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AbsoluteDifferencebatchPS", - VX_KERNEL_RPP_ABSOLUTEDIFFERENCEBATCHPS, - processAbsoluteDifferencebatchPS, - 7, - validateAbsoluteDifferencebatchPS, - initializeAbsoluteDifferencebatchPS, - uninitializeAbsoluteDifferencebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Accumulate.cpp b/amd_openvx_extensions/amd_rpp/source/Accumulate.cpp deleted file mode 100644 index a1fd3e2178..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Accumulate.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulate(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Accumulate: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Accumulate: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulate(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateLocalData * data = new AccumulateLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulate(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Accumulate_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Accumulate", - VX_KERNEL_RPP_ACCUMULATE, - processAccumulate, - 3, - validateAccumulate, - initializeAccumulate, - uninitializeAccumulate); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateSquared.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateSquared.cpp deleted file mode 100644 index a394bedb3b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateSquared.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateSquaredLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateSquared(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateSquaredLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateSquared(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateSquared: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateSquared(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateSquaredLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateSquared(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateSquared(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_host(data->pSrc,data->srcDimensions,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_host(data->pSrc,data->srcDimensions,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateSquared(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredLocalData * data = new AccumulateSquaredLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[1], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateSquared(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateSquared(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulateSquared_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateSquared", - VX_KERNEL_RPP_ACCUMULATESQUARED, - processAccumulateSquared, - 2, - validateAccumulateSquared, - initializeAccumulateSquared, - uninitializeAccumulateSquared); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPD.cpp index be77fd7721..dec5394435 100644 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPD.cpp @@ -22,191 +22,252 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct AccumulateSquaredbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; +struct AccumulateSquaredbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; #if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif + cl_mem cl_pSrc; +#elif ENABLE_HIP + void *hip_pSrc; +#endif }; static vx_status VX_CALLBACK refreshAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateSquaredbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateAccumulateSquaredbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateSquaredbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateSquaredbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processAccumulateSquaredbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateSquaredbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + AccumulateSquaredbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateSquaredbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshAccumulateSquaredbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshAccumulateSquaredbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateSquaredbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshAccumulateSquaredbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AccumulateSquaredbatchPDLocalData * data = new AccumulateSquaredbatchPDLocalData; - memset(data, 0, sizeof(*data)); + AccumulateSquaredbatchPDLocalData *data = new AccumulateSquaredbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateSquaredbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshAccumulateSquaredbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeAccumulateSquaredbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AccumulateSquaredbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + AccumulateSquaredbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status AccumulateSquaredbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateSquaredbatchPD", - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD, - processAccumulateSquaredbatchPD, - 5, - validateAccumulateSquaredbatchPD, - initializeAccumulateSquaredbatchPD, - uninitializeAccumulateSquaredbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateSquaredbatchPD", + VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPD, + processAccumulateSquaredbatchPD, + 5, + validateAccumulateSquaredbatchPD, + initializeAccumulateSquaredbatchPD, + uninitializeAccumulateSquaredbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPDROID.cpp deleted file mode 100644 index 87e5677d73..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPDROID.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateSquaredbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateSquaredbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateSquaredbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateSquaredbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateSquaredbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateSquaredbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateSquaredbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateSquaredbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateSquaredbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateSquaredbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredbatchPDROIDLocalData * data = new AccumulateSquaredbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateSquaredbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateSquaredbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulateSquaredbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateSquaredbatchPDROID", - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPDROID, - processAccumulateSquaredbatchPDROID, - 9, - validateAccumulateSquaredbatchPDROID, - initializeAccumulateSquaredbatchPDROID, - uninitializeAccumulateSquaredbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPS.cpp deleted file mode 100644 index 0cbb375a02..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateSquaredbatchPS.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateSquaredbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; -#if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateSquaredbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateSquaredbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateSquaredbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateSquaredbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateSquaredbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateSquaredbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateSquaredbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateSquaredbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_squared_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_squared_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateSquaredbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredbatchPSLocalData * data = new AccumulateSquaredbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateSquaredbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateSquaredbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateSquaredbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulateSquaredbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateSquaredbatchPS", - VX_KERNEL_RPP_ACCUMULATESQUAREDBATCHPS, - processAccumulateSquaredbatchPS, - 5, - validateAccumulateSquaredbatchPS, - initializeAccumulateSquaredbatchPS, - uninitializeAccumulateSquaredbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateWeighted.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateWeighted.cpp deleted file mode 100644 index 64a38bb8c4..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateWeighted.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateWeightedLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - Rpp32f alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateWeighted(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateWeightedLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->alpha)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateWeighted(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeighted: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeighted: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateWeighted(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateWeightedLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateWeighted(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->alpha,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->alpha,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateWeighted(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->alpha,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->alpha,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateWeighted(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedLocalData * data = new AccumulateWeightedLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateWeighted(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateWeighted(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulateWeighted_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateWeighted", - VX_KERNEL_RPP_ACCUMULATEWEIGHTED, - processAccumulateWeighted, - 4, - validateAccumulateWeighted, - initializeAccumulateWeighted, - uninitializeAccumulateWeighted); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPD.cpp index 43b3dc2118..6cda8908c8 100644 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPD.cpp @@ -22,209 +22,272 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct AccumulateWeightedbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - vx_float32 *alpha; +struct AccumulateWeightedbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + vx_float32 *alpha; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; +#endif }; static vx_status VX_CALLBACK refreshAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateWeightedbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateAccumulateWeightedbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processAccumulateWeightedbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateWeightedbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + AccumulateWeightedbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateWeightedbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshAccumulateWeightedbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshAccumulateWeightedbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateWeightedbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshAccumulateWeightedbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->alpha, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AccumulateWeightedbatchPDLocalData * data = new AccumulateWeightedbatchPDLocalData; - memset(data, 0, sizeof(*data)); + AccumulateWeightedbatchPDLocalData *data = new AccumulateWeightedbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateWeightedbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshAccumulateWeightedbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeAccumulateWeightedbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AccumulateWeightedbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + AccumulateWeightedbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + free(data->alpha); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status AccumulateWeightedbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateWeightedbatchPD", - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD, - processAccumulateWeightedbatchPD, - 7, - validateAccumulateWeightedbatchPD, - initializeAccumulateWeightedbatchPD, - uninitializeAccumulateWeightedbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateWeightedbatchPD", + VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPD, + processAccumulateWeightedbatchPD, + 7, + validateAccumulateWeightedbatchPD, + initializeAccumulateWeightedbatchPD, + uninitializeAccumulateWeightedbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPDROID.cpp deleted file mode 100644 index 7bec6fb4d8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateWeightedbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - vx_float32 *alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateWeightedbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateWeightedbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateWeightedbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateWeightedbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateWeightedbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateWeightedbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateWeightedbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateWeightedbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedbatchPDROIDLocalData * data = new AccumulateWeightedbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateWeightedbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateWeightedbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulateWeightedbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateWeightedbatchPDROID", - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPDROID, - processAccumulateWeightedbatchPDROID, - 11, - validateAccumulateWeightedbatchPDROID, - initializeAccumulateWeightedbatchPDROID, - uninitializeAccumulateWeightedbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPS.cpp deleted file mode 100644 index a2a0b94b8c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulateWeightedbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulateWeightedbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - Rpp32f alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulateWeightedbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulateWeightedbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->alpha)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulateWeightedbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulateWeightedbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulateWeightedbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulateWeightedbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulateWeightedbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulateWeightedbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_weighted_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_weighted_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulateWeightedbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedbatchPSLocalData * data = new AccumulateWeightedbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulateWeightedbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulateWeightedbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulateWeightedbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulateWeightedbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulateWeightedbatchPS", - VX_KERNEL_RPP_ACCUMULATEWEIGHTEDBATCHPS, - processAccumulateWeightedbatchPS, - 7, - validateAccumulateWeightedbatchPS, - initializeAccumulateWeightedbatchPS, - uninitializeAccumulateWeightedbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPD.cpp index 07bbb1aa21..490c693d49 100644 --- a/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPD.cpp @@ -22,204 +22,267 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct AccumulatebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; +struct AccumulatebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; +#endif }; static vx_status VX_CALLBACK refreshAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulatebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateAccumulatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processAccumulatebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulatebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + AccumulatebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshAccumulatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshAccumulatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshAccumulatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_accumulate_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_accumulate_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AccumulatebatchPDLocalData * data = new AccumulatebatchPDLocalData; - memset(data, 0, sizeof(*data)); + AccumulatebatchPDLocalData *data = new AccumulatebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulatebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshAccumulatebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeAccumulatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AccumulatebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + AccumulatebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status AccumulatebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulatebatchPD", - VX_KERNEL_RPP_ACCUMULATEBATCHPD, - processAccumulatebatchPD, - 6, - validateAccumulatebatchPD, - initializeAccumulatebatchPD, - uninitializeAccumulatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulatebatchPD", + VX_KERNEL_RPP_ACCUMULATEBATCHPD, + processAccumulatebatchPD, + 6, + validateAccumulatebatchPD, + initializeAccumulatebatchPD, + uninitializeAccumulatebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPDROID.cpp deleted file mode 100644 index 3ae217040f..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPDROID.cpp +++ /dev/null @@ -1,245 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulatebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulatebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulatebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulatebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulatebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulatebatchPDROIDLocalData * data = new AccumulatebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulatebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulatebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulatebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulatebatchPDROID", - VX_KERNEL_RPP_ACCUMULATEBATCHPDROID, - processAccumulatebatchPDROID, - 10, - validateAccumulatebatchPDROID, - initializeAccumulatebatchPDROID, - uninitializeAccumulatebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPS.cpp deleted file mode 100644 index da25dc64ca..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AccumulatebatchPS.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AccumulatebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; -#endif -}; - -static vx_status VX_CALLBACK refreshAccumulatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, AccumulatebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAccumulatebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AccumulatebatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAccumulatebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AccumulatebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAccumulatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAccumulatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_accumulate_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_accumulate_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAccumulatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulatebatchPSLocalData * data = new AccumulatebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAccumulatebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAccumulatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AccumulatebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AccumulatebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AccumulatebatchPS", - VX_KERNEL_RPP_ACCUMULATEBATCHPS, - processAccumulatebatchPS, - 6, - validateAccumulatebatchPS, - initializeAccumulatebatchPS, - uninitializeAccumulatebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_BIDIRECTIONAL, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Add.cpp b/amd_openvx_extensions/amd_rpp/source/Add.cpp deleted file mode 100644 index 632e64825c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Add.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AddLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAdd(vx_node node, const vx_reference *parameters, vx_uint32 num, AddLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAdd(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Add: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Add: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAdd(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AddLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAdd(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAdd(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddLocalData * data = new AddLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAdd(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Add_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Add", - VX_KERNEL_RPP_ADD, - processAdd, - 4, - validateAdd, - initializeAdd, - uninitializeAdd); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AddbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/AddbatchPD.cpp index 0381d5ef02..6415f55631 100644 --- a/amd_openvx_extensions/amd_rpp/source/AddbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/AddbatchPD.cpp @@ -22,209 +22,272 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct AddbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct AddbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, AddbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateAddbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processAddbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AddbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + AddbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAddbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshAddbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_add_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_add_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshAddbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_add_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_add_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAddbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshAddbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_add_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_add_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AddbatchPDLocalData * data = new AddbatchPDLocalData; - memset(data, 0, sizeof(*data)); + AddbatchPDLocalData *data = new AddbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAddbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshAddbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeAddbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - AddbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + AddbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status AddbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AddbatchPD", - VX_KERNEL_RPP_ADDBATCHPD, - processAddbatchPD, - 7, - validateAddbatchPD, - initializeAddbatchPD, - uninitializeAddbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AddbatchPD", + VX_KERNEL_RPP_ADDBATCHPD, + processAddbatchPD, + 7, + validateAddbatchPD, + initializeAddbatchPD, + uninitializeAddbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + if (kernel) + { + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/AddbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/AddbatchPDROID.cpp deleted file mode 100644 index 8306cf33c8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AddbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AddbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAddbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, AddbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAddbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAddbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AddbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAddbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAddbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAddbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddbatchPDROIDLocalData * data = new AddbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAddbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAddbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AddbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AddbatchPDROID", - VX_KERNEL_RPP_ADDBATCHPDROID, - processAddbatchPDROID, - 11, - validateAddbatchPDROID, - initializeAddbatchPDROID, - uninitializeAddbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/AddbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/AddbatchPS.cpp deleted file mode 100644 index 5104043a63..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/AddbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct AddbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshAddbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, AddbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateAddbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: AddbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processAddbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - AddbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshAddbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshAddbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_add_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_add_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeAddbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddbatchPSLocalData * data = new AddbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshAddbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeAddbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - AddbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status AddbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.AddbatchPS", - VX_KERNEL_RPP_ADDBATCHPS, - processAddbatchPS, - 7, - validateAddbatchPS, - initializeAddbatchPS, - uninitializeAddbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BilateralFilter.cpp b/amd_openvx_extensions/amd_rpp/source/BilateralFilter.cpp deleted file mode 100644 index 6bc1ca730a..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BilateralFilter.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BilateralFilterLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; - Rpp64f sigmaI; - Rpp64f sigmaS; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBilateralFilter(vx_node node, const vx_reference *parameters, vx_uint32 num, BilateralFilterLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->sigmaI)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->sigmaS)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBilateralFilter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT64) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT64) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BilateralFilter: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBilateralFilter(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BilateralFilterLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBilateralFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bilateral_filter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bilateral_filter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBilateralFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_bilateral_filter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_bilateral_filter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBilateralFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BilateralFilterLocalData * data = new BilateralFilterLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBilateralFilter(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBilateralFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BilateralFilterLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BilateralFilter_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BilateralFilter", - VX_KERNEL_RPP_BILATERALFILTER, - processBilateralFilter, - 6, - validateBilateralFilter, - initializeBilateralFilter, - uninitializeBilateralFilter); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPD.cpp index 862ed1cb66..392eba90d1 100644 --- a/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPD.cpp @@ -22,211 +22,275 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BilateralFilterbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; - vx_float64 *sigmaI; - vx_float64 *sigmaS; +struct BilateralFilterbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; + vx_float64 *sigmaI; + vx_float64 *sigmaS; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshBilateralFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BilateralFilterbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->sigmaI = (vx_float64 *)malloc(sizeof(vx_float64) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float64),data->sigmaI, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->sigmaS = (vx_float64 *)malloc(sizeof(vx_float64) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float64),data->sigmaS, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float64), data->sigmaI, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float64), data->sigmaS, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBilateralFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BilateralFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BilateralFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBilateralFilterbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BilateralFilterbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBilateralFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BilateralFilterbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBilateralFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBilateralFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->sigmaI, data->sigmaS, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->sigmaI, data->sigmaS, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshBilateralFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->sigmaI, data->sigmaS, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->sigmaI, data->sigmaS, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBilateralFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBilateralFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + // rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + // rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBilateralFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBilateralFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BilateralFilterbatchPDLocalData * data = new BilateralFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BilateralFilterbatchPDLocalData *data = new BilateralFilterbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBilateralFilterbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); + data->sigmaS = (vx_float64 *)malloc(sizeof(vx_float64) * data->nbatchSize); + data->sigmaI = (vx_float64 *)malloc(sizeof(vx_float64) * data->nbatchSize); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshBilateralFilterbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBilateralFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BilateralFilterbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BilateralFilterbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + free(data->sigmaI); + free(data->sigmaS); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status BilateralFilterbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BilateralFilterbatchPD", - VX_KERNEL_RPP_BILATERALFILTERBATCHPD, - processBilateralFilterbatchPD, - 9, - validateBilateralFilterbatchPD, - initializeBilateralFilterbatchPD, - uninitializeBilateralFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BilateralFilterbatchPD", + VX_KERNEL_RPP_BILATERALFILTERBATCHPD, + processBilateralFilterbatchPD, + 9, + validateBilateralFilterbatchPD, + initializeBilateralFilterbatchPD, + uninitializeBilateralFilterbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPDROID.cpp deleted file mode 100644 index eb7bd477d3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPDROID.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BilateralFilterbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; - vx_float64 *sigmaI; - vx_float64 *sigmaS; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBilateralFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BilateralFilterbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->sigmaI = (vx_float64 *)malloc(sizeof(vx_float64) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float64),data->sigmaI, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->sigmaS = (vx_float64 *)malloc(sizeof(vx_float64) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float64),data->sigmaS, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBilateralFilterbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BilateralFilterbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBilateralFilterbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BilateralFilterbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBilateralFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBilateralFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_bilateral_filter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_bilateral_filter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBilateralFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BilateralFilterbatchPDROIDLocalData * data = new BilateralFilterbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBilateralFilterbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBilateralFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BilateralFilterbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BilateralFilterbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BilateralFilterbatchPDROID", - VX_KERNEL_RPP_BILATERALFILTERBATCHPDROID, - processBilateralFilterbatchPDROID, - 13, - validateBilateralFilterbatchPDROID, - initializeBilateralFilterbatchPDROID, - uninitializeBilateralFilterbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPS.cpp deleted file mode 100644 index 732b4874f4..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BilateralFilterbatchPS.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BilateralFilterbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; - Rpp64f sigmaI; - Rpp64f sigmaS; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBilateralFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BilateralFilterbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->sigmaI)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->sigmaS)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBilateralFilterbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT64) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT64) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BilateralFilterbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBilateralFilterbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BilateralFilterbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBilateralFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bilateral_filter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bilateral_filter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBilateralFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_bilateral_filter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_bilateral_filter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->sigmaI,data->sigmaS,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBilateralFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BilateralFilterbatchPSLocalData * data = new BilateralFilterbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBilateralFilterbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBilateralFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BilateralFilterbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BilateralFilterbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BilateralFilterbatchPS", - VX_KERNEL_RPP_BILATERALFILTERBATCHPS, - processBilateralFilterbatchPS, - 9, - validateBilateralFilterbatchPS, - initializeBilateralFilterbatchPS, - uninitializeBilateralFilterbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseAND.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseAND.cpp deleted file mode 100644 index 3ae0cec957..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseAND.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseANDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseAND(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseANDLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseAND(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseAND: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseAND: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseAND(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseANDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseAND(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseAND(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseAND(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDLocalData * data = new BitwiseANDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseAND(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseAND(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BitwiseAND_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseAND", - VX_KERNEL_RPP_BITWISEAND, - processBitwiseAND, - 4, - validateBitwiseAND, - initializeBitwiseAND, - uninitializeBitwiseAND); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPD.cpp index 4a2eeb9f3b..8421cabe29 100644 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPD.cpp @@ -22,209 +22,275 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BitwiseANDbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct BitwiseANDbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseANDbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBitwiseANDbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBitwiseANDbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseANDbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BitwiseANDbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseANDbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBitwiseANDbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshBitwiseANDbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseANDbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBitwiseANDbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BitwiseANDbatchPDLocalData * data = new BitwiseANDbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BitwiseANDbatchPDLocalData *data = new BitwiseANDbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseANDbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshBitwiseANDbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBitwiseANDbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BitwiseANDbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BitwiseANDbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status BitwiseANDbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseANDbatchPD", - VX_KERNEL_RPP_BITWISEANDBATCHPD, - processBitwiseANDbatchPD, - 7, - validateBitwiseANDbatchPD, - initializeBitwiseANDbatchPD, - uninitializeBitwiseANDbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseANDbatchPD", + VX_KERNEL_RPP_BITWISEANDBATCHPD, + processBitwiseANDbatchPD, + 7, + validateBitwiseANDbatchPD, + initializeBitwiseANDbatchPD, + uninitializeBitwiseANDbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPDROID.cpp deleted file mode 100644 index 5de242b840..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseANDbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseANDbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseANDbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseANDbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseANDbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseANDbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseANDbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseANDbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseANDbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDbatchPDROIDLocalData * data = new BitwiseANDbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseANDbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseANDbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BitwiseANDbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseANDbatchPDROID", - VX_KERNEL_RPP_BITWISEANDBATCHPDROID, - processBitwiseANDbatchPDROID, - 11, - validateBitwiseANDbatchPDROID, - initializeBitwiseANDbatchPDROID, - uninitializeBitwiseANDbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPS.cpp deleted file mode 100644 index 038b8e77c6..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseANDbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseANDbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseANDbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseANDbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseANDbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseANDbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseANDbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseANDbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseANDbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseANDbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_AND_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_AND_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseANDbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDbatchPSLocalData * data = new BitwiseANDbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseANDbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseANDbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseANDbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BitwiseANDbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseANDbatchPS", - VX_KERNEL_RPP_BITWISEANDBATCHPS, - processBitwiseANDbatchPS, - 7, - validateBitwiseANDbatchPS, - initializeBitwiseANDbatchPS, - uninitializeBitwiseANDbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseNOT.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseNOT.cpp deleted file mode 100644 index ca1983095d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseNOT.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseNOTLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseNOT(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseNOTLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseNOT(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseNOT: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseNOT(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseNOTLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseNOT(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseNOT(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseNOT(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTLocalData * data = new BitwiseNOTLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseNOT(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseNOT(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BitwiseNOT_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseNOT", - VX_KERNEL_RPP_BITWISENOT, - processBitwiseNOT, - 3, - validateBitwiseNOT, - initializeBitwiseNOT, - uninitializeBitwiseNOT); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPD.cpp index f1a95c4020..678ab65a8d 100644 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPD.cpp @@ -22,196 +22,258 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BitwiseNOTbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct BitwiseNOTbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseNOTbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = srcBatch_width[i]; + data->srcDimensions[i].height = srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBitwiseNOTbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseNOTbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseNOTbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBitwiseNOTbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseNOTbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BitwiseNOTbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseNOTbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBitwiseNOTbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshBitwiseNOTbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseNOTbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBitwiseNOTbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BitwiseNOTbatchPDLocalData * data = new BitwiseNOTbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BitwiseNOTbatchPDLocalData *data = new BitwiseNOTbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseNOTbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshBitwiseNOTbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBitwiseNOTbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BitwiseNOTbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BitwiseNOTbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status BitwiseNOTbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseNOTbatchPD", - VX_KERNEL_RPP_BITWISENOTBATCHPD, - processBitwiseNOTbatchPD, - 6, - validateBitwiseNOTbatchPD, - initializeBitwiseNOTbatchPD, - uninitializeBitwiseNOTbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseNOTbatchPD", + VX_KERNEL_RPP_BITWISENOTBATCHPD, + processBitwiseNOTbatchPD, + 6, + validateBitwiseNOTbatchPD, + initializeBitwiseNOTbatchPD, + uninitializeBitwiseNOTbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPDROID.cpp deleted file mode 100644 index ad16fad420..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseNOTbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseNOTbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseNOTbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseNOTbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseNOTbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseNOTbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseNOTbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseNOTbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseNOTbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseNOTbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTbatchPDROIDLocalData * data = new BitwiseNOTbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseNOTbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseNOTbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BitwiseNOTbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseNOTbatchPDROID", - VX_KERNEL_RPP_BITWISENOTBATCHPDROID, - processBitwiseNOTbatchPDROID, - 10, - validateBitwiseNOTbatchPDROID, - initializeBitwiseNOTbatchPDROID, - uninitializeBitwiseNOTbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPS.cpp deleted file mode 100644 index b5a3a86a03..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BitwiseNOTbatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BitwiseNOTbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBitwiseNOTbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BitwiseNOTbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBitwiseNOTbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BitwiseNOTbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBitwiseNOTbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BitwiseNOTbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBitwiseNOTbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBitwiseNOTbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_bitwise_NOT_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_bitwise_NOT_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBitwiseNOTbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTbatchPSLocalData * data = new BitwiseNOTbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBitwiseNOTbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBitwiseNOTbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BitwiseNOTbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BitwiseNOTbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BitwiseNOTbatchPS", - VX_KERNEL_RPP_BITWISENOTBATCHPS, - processBitwiseNOTbatchPS, - 6, - validateBitwiseNOTbatchPS, - initializeBitwiseNOTbatchPS, - uninitializeBitwiseNOTbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Blend.cpp b/amd_openvx_extensions/amd_rpp/source/Blend.cpp deleted file mode 100644 index f83713d12a..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Blend.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlendLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - Rpp32f alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlend(vx_node node, const vx_reference *parameters, vx_uint32 num, BlendLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->alpha)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlend(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Blend: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Blend: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlend(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlendLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlend(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->alpha,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->alpha,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlend(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->alpha,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->alpha,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlend(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendLocalData * data = new BlendLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlend(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlend(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Blend_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Blend", - VX_KERNEL_RPP_BLEND, - processBlend, - 5, - validateBlend, - initializeBlend, - uninitializeBlend); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BlendbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BlendbatchPD.cpp index 819cc64f67..b1b78c0775 100644 --- a/amd_openvx_extensions/amd_rpp/source/BlendbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BlendbatchPD.cpp @@ -22,235 +22,279 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BlendbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_float32 *alpha; +struct BlendbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; + vx_float32 *alpha; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc1; - void *hip_pSrc2; - void *hip_pDst; -#endif + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BlendbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float32),data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBlendbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBlendbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlendbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BlendbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlendbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBlendbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_blend_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->alpha, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_blend_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->alpha, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshBlendbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1,(void *)data->hip_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1,(void *)data->hip_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBlendbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_blend_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->alpha, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_blend_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->alpha, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlendbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBlendbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_blend_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_blend_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BlendbatchPDLocalData * data = new BlendbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BlendbatchPDLocalData *data = new BlendbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlendbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshBlendbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBlendbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BlendbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BlendbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->alpha); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status BlendbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlendbatchPD", - VX_KERNEL_RPP_BLENDBATCHPD, - processBlendbatchPD, - 8, - validateBlendbatchPD, - initializeBlendbatchPD, - uninitializeBlendbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlendbatchPD", + VX_KERNEL_RPP_BLENDBATCHPD, + processBlendbatchPD, + 8, + validateBlendbatchPD, + initializeBlendbatchPD, + uninitializeBlendbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BlendbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BlendbatchPDROID.cpp deleted file mode 100644 index a675662f72..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BlendbatchPDROID.cpp +++ /dev/null @@ -1,255 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlendbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_float32 *alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlendbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BlendbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float32),data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlendbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlendbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlendbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlendbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlendbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlendbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendbatchPDROIDLocalData * data = new BlendbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlendbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlendbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BlendbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlendbatchPDROID", - VX_KERNEL_RPP_BLENDBATCHPDROID, - processBlendbatchPDROID, - 12, - validateBlendbatchPDROID, - initializeBlendbatchPDROID, - uninitializeBlendbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BlendbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BlendbatchPS.cpp deleted file mode 100644 index da672dacbc..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BlendbatchPS.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlendbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - Rpp32f alpha; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlendbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BlendbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->alpha)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlendbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlendbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlendbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlendbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlendbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlendbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blend_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blend_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlendbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendbatchPSLocalData * data = new BlendbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlendbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlendbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlendbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BlendbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlendbatchPS", - VX_KERNEL_RPP_BLENDBATCHPS, - processBlendbatchPS, - 8, - validateBlendbatchPS, - initializeBlendbatchPS, - uninitializeBlendbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Blur.cpp b/amd_openvx_extensions/amd_rpp/source/Blur.cpp deleted file mode 100644 index fd1b93ea0f..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Blur.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlurLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlur(vx_node node, const vx_reference *parameters, vx_uint32 num, BlurLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlur(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Blur: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlur(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlurLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlur(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlur(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlur(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurLocalData * data = new BlurLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlur(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlur(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Blur_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Blur", - VX_KERNEL_RPP_BLUR, - processBlur, - 4, - validateBlur, - initializeBlur, - uninitializeBlur); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BlurbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BlurbatchPD.cpp index 9f98eda28c..efffd268de 100644 --- a/amd_openvx_extensions/amd_rpp/source/BlurbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BlurbatchPD.cpp @@ -22,223 +22,268 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BlurbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct BlurbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BlurbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBlurbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlurbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlurbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBlurbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlurbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BlurbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlurbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBlurbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_blur_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_blur_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshBlurbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBlurbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_blur_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_blur_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlurbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBlurbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_blur_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_blur_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BlurbatchPDLocalData * data = new BlurbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BlurbatchPDLocalData *data = new BlurbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlurbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshBlurbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - { - hipStreamCreate(&data->handle.hipstream); - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); - } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + hipStreamCreate(&data->handle.hipstream); + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + } #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBlurbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BlurbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BlurbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status BlurbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlurbatchPD", - VX_KERNEL_RPP_BLURBATCHPD, - processBlurbatchPD, - 7, - validateBlurbatchPD, - initializeBlurbatchPD, - uninitializeBlurbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlurbatchPD", + VX_KERNEL_RPP_BLURBATCHPD, + processBlurbatchPD, + 7, + validateBlurbatchPD, + initializeBlurbatchPD, + uninitializeBlurbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BlurbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BlurbatchPDROID.cpp deleted file mode 100644 index a912379407..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BlurbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlurbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlurbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BlurbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlurbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlurbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlurbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlurbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlurbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlurbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlurbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurbatchPDROIDLocalData * data = new BlurbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlurbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlurbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BlurbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlurbatchPDROID", - VX_KERNEL_RPP_BLURBATCHPDROID, - processBlurbatchPDROID, - 11, - validateBlurbatchPDROID, - initializeBlurbatchPDROID, - uninitializeBlurbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BlurbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BlurbatchPS.cpp deleted file mode 100644 index 72b4683b26..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BlurbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BlurbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBlurbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BlurbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBlurbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BlurbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBlurbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BlurbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBlurbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBlurbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_blur_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_blur_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBlurbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurbatchPSLocalData * data = new BlurbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBlurbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBlurbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BlurbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BlurbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BlurbatchPS", - VX_KERNEL_RPP_BLURBATCHPS, - processBlurbatchPS, - 7, - validateBlurbatchPS, - initializeBlurbatchPS, - uninitializeBlurbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BoxFilter.cpp b/amd_openvx_extensions/amd_rpp/source/BoxFilter.cpp deleted file mode 100644 index 0f455d8c19..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BoxFilter.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BoxFilterLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBoxFilter(vx_node node, const vx_reference *parameters, vx_uint32 num, BoxFilterLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBoxFilter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BoxFilter: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBoxFilter(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BoxFilterLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBoxFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBoxFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBoxFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterLocalData * data = new BoxFilterLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBoxFilter(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBoxFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BoxFilter_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BoxFilter", - VX_KERNEL_RPP_BOXFILTER, - processBoxFilter, - 4, - validateBoxFilter, - initializeBoxFilter, - uninitializeBoxFilter); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPD.cpp index d5d1b42a23..d3dde6a244 100644 --- a/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPD.cpp @@ -22,201 +22,266 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BoxFilterbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct BoxFilterbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BoxFilterbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBoxFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BoxFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BoxFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBoxFilterbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BoxFilterbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BoxFilterbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBoxFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBoxFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_box_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_box_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshBoxFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_box_filter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_box_filter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBoxFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBoxFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_box_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_box_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BoxFilterbatchPDLocalData * data = new BoxFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BoxFilterbatchPDLocalData *data = new BoxFilterbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBoxFilterbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshBoxFilterbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBoxFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BoxFilterbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BoxFilterbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status BoxFilterbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BoxFilterbatchPD", - VX_KERNEL_RPP_BOXFILTERBATCHPD, - processBoxFilterbatchPD, - 7, - validateBoxFilterbatchPD, - initializeBoxFilterbatchPD, - uninitializeBoxFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BoxFilterbatchPD", + VX_KERNEL_RPP_BOXFILTERBATCHPD, + processBoxFilterbatchPD, + 7, + validateBoxFilterbatchPD, + initializeBoxFilterbatchPD, + uninitializeBoxFilterbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPDROID.cpp deleted file mode 100644 index 28210b6fef..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BoxFilterbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBoxFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BoxFilterbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBoxFilterbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BoxFilterbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBoxFilterbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BoxFilterbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBoxFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBoxFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBoxFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterbatchPDROIDLocalData * data = new BoxFilterbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBoxFilterbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBoxFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BoxFilterbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BoxFilterbatchPDROID", - VX_KERNEL_RPP_BOXFILTERBATCHPDROID, - processBoxFilterbatchPDROID, - 11, - validateBoxFilterbatchPDROID, - initializeBoxFilterbatchPDROID, - uninitializeBoxFilterbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPS.cpp deleted file mode 100644 index b80df1dddb..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BoxFilterbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BoxFilterbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBoxFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BoxFilterbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBoxFilterbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BoxFilterbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBoxFilterbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BoxFilterbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBoxFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBoxFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_box_filter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_box_filter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBoxFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterbatchPSLocalData * data = new BoxFilterbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBoxFilterbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBoxFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BoxFilterbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BoxFilterbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BoxFilterbatchPS", - VX_KERNEL_RPP_BOXFILTERBATCHPS, - processBoxFilterbatchPS, - 7, - validateBoxFilterbatchPS, - initializeBoxFilterbatchPS, - uninitializeBoxFilterbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Brightness.cpp b/amd_openvx_extensions/amd_rpp/source/Brightness.cpp deleted file mode 100644 index 7b44c5d26b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Brightness.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BrightnessLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f alpha; - Rpp32f beta; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBrightness(vx_node node, const vx_reference *parameters, vx_uint32 num, BrightnessLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->alpha)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->beta)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBrightness(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Brightness: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBrightness(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BrightnessLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBrightness(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBrightness(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->alpha,data->beta,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->alpha,data->beta,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBrightness(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessLocalData * data = new BrightnessLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBrightness(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBrightness(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Brightness_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Brightness", - VX_KERNEL_RPP_BRIGHTNESS, - processBrightness, - 5, - validateBrightness, - initializeBrightness, - uninitializeBrightness); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPD.cpp index cf9f71baa8..21aebdf1a8 100644 --- a/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPD.cpp @@ -22,251 +22,268 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct BrightnessbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *alpha; - vx_float32 *beta; +struct BrightnessbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *alpha; + vx_float32 *beta; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, BrightnessbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float32),data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateBrightnessbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BrightnessbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BrightnessbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processBrightnessbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BrightnessbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + BrightnessbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBrightnessbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBrightnessbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_brightness_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->alpha, data->beta, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_brightness_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->alpha, data->beta, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshBrightnessbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshBrightnessbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_brightness_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->alpha, data->beta, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_brightness_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->alpha, data->beta, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBrightnessbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshBrightnessbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_brightness_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_brightness_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BrightnessbatchPDLocalData * data = new BrightnessbatchPDLocalData; - memset(data, 0, sizeof(*data)); + BrightnessbatchPDLocalData *data = new BrightnessbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBrightnessbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshBrightnessbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeBrightnessbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - BrightnessbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + BrightnessbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + free(data->alpha); + free(data->beta); + delete (data); + return VX_SUCCESS; } //! \brief The kernel target support callback. // TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32& supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) - ) + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - // hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - - return VX_SUCCESS; +#endif + return VX_SUCCESS; } - vx_status BrightnessbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BrightnessbatchPD", - VX_KERNEL_RPP_BRIGHTNESSBATCHPD, - processBrightnessbatchPD, - 8, - validateBrightnessbatchPD, - initializeBrightnessbatchPD, - uninitializeBrightnessbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BrightnessbatchPD", + VX_KERNEL_RPP_BRIGHTNESSBATCHPD, + processBrightnessbatchPD, + 8, + validateBrightnessbatchPD, + initializeBrightnessbatchPD, + uninitializeBrightnessbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPDROID.cpp deleted file mode 100644 index d475077928..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPDROID.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BrightnessbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *alpha; - vx_float32 *beta; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBrightnessbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, BrightnessbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float32),data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBrightnessbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BrightnessbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBrightnessbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BrightnessbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBrightnessbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBrightnessbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->beta,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->beta,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBrightnessbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessbatchPDROIDLocalData * data = new BrightnessbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBrightnessbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBrightnessbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BrightnessbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BrightnessbatchPDROID", - VX_KERNEL_RPP_BRIGHTNESSBATCHPDROID, - processBrightnessbatchPDROID, - 12, - validateBrightnessbatchPDROID, - initializeBrightnessbatchPDROID, - uninitializeBrightnessbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPS.cpp deleted file mode 100644 index 1e8e0b828d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/BrightnessbatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct BrightnessbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f alpha; - Rpp32f beta; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshBrightnessbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, BrightnessbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->alpha)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->beta)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateBrightnessbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: BrightnessbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processBrightnessbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - BrightnessbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshBrightnessbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshBrightnessbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_brightness_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_brightness_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->alpha,data->beta,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeBrightnessbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessbatchPSLocalData * data = new BrightnessbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshBrightnessbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeBrightnessbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - BrightnessbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status BrightnessbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.BrightnessbatchPS", - VX_KERNEL_RPP_BRIGHTNESSBATCHPS, - processBrightnessbatchPS, - 8, - validateBrightnessbatchPS, - initializeBrightnessbatchPS, - uninitializeBrightnessbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/CannyEdgeDetector.cpp b/amd_openvx_extensions/amd_rpp/source/CannyEdgeDetector.cpp index 84e6e66643..9612036388 100644 --- a/amd_openvx_extensions/amd_rpp/source/CannyEdgeDetector.cpp +++ b/amd_openvx_extensions/amd_rpp/source/CannyEdgeDetector.cpp @@ -22,185 +22,242 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct CannyEdgeDetectorLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp8u max; - Rpp8u min; +struct CannyEdgeDetectorLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp8u max; + Rpp8u min; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num, CannyEdgeDetectorLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->max)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->min)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->max)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->min)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateCannyEdgeDetector(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CannyEdgeDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT8) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT8) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CannyEdgeDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processCannyEdgeDetector(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CannyEdgeDetectorLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + CannyEdgeDetectorLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCannyEdgeDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_canny_edge_detector_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->max,data->min,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_canny_edge_detector_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->max,data->min,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - + refreshCannyEdgeDetector(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_canny_edge_detector_u8_pln1_gpu((void *)data->cl_pSrc, data->srcDimensions, (void *)data->cl_pDst, data->max, data->min, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_canny_edge_detector_u8_pkd3_gpu((void *)data->cl_pSrc, data->srcDimensions, (void *)data->cl_pDst, data->max, data->min, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshCannyEdgeDetector(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_canny_edge_detector_u8_pln1_gpu((void *)data->hip_pSrc, data->srcDimensions, (void *)data->hip_pDst, data->max, data->min, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_canny_edge_detector_u8_pkd3_gpu((void *)data->hip_pSrc, data->srcDimensions, (void *)data->hip_pDst, data->max, data->min, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshCannyEdgeDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_canny_edge_detector_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->max,data->min,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_canny_edge_detector_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->max,data->min,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshCannyEdgeDetector(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_canny_edge_detector_u8_pln1_host(data->pSrc, data->srcDimensions, data->pDst, data->max, data->min, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_canny_edge_detector_u8_pkd3_host(data->pSrc, data->srcDimensions, data->pDst, data->max, data->min, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CannyEdgeDetectorLocalData * data = new CannyEdgeDetectorLocalData; - memset(data, 0, sizeof(*data)); + CannyEdgeDetectorLocalData *data = new CannyEdgeDetectorLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshCannyEdgeDetector(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshCannyEdgeDetector(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeCannyEdgeDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CannyEdgeDetectorLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + CannyEdgeDetectorLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status CannyEdgeDetector_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CannyEdgeDetector", - VX_KERNEL_RPP_CANNYEDGEDETECTOR, - processCannyEdgeDetector, - 5, - validateCannyEdgeDetector, - initializeCannyEdgeDetector, - uninitializeCannyEdgeDetector); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CannyEdgeDetector", + VX_KERNEL_RPP_CANNYEDGEDETECTOR, + processCannyEdgeDetector, + 5, + validateCannyEdgeDetector, + initializeCannyEdgeDetector, + uninitializeCannyEdgeDetector); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ChannelCombine.cpp b/amd_openvx_extensions/amd_rpp/source/ChannelCombine.cpp deleted file mode 100644 index f45b3e5fb8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ChannelCombine.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ChannelCombineLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pSrc3; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pSrc3; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshChannelCombine(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelCombineLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc3, sizeof(data->cl_pSrc3))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc3, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateChannelCombine(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombine: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombine: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombine: image: #2 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processChannelCombine(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelCombineLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshChannelCombine(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_combine_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_combine_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshChannelCombine(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_combine_u8_pln1_host(data->pSrc1,data->pSrc2,data->pSrc3,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_combine_u8_pkd3_host(data->pSrc1,data->pSrc2,data->pSrc3,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeChannelCombine(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelCombineLocalData * data = new ChannelCombineLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshChannelCombine(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeChannelCombine(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelCombineLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ChannelCombine_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelCombine", - VX_KERNEL_RPP_CHANNELCOMBINE, - processChannelCombine, - 5, - validateChannelCombine, - initializeChannelCombine, - uninitializeChannelCombine); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPD.cpp index eeaf15db44..5f4022c642 100644 --- a/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPD.cpp @@ -22,222 +22,285 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ChannelCombinebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pSrc3; - RppPtr_t pDst; +struct ChannelCombinebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pSrc3; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pSrc3; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pSrc3; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pSrc3; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelCombinebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc3, sizeof(data->cl_pSrc3))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc3, sizeof(data->cl_pSrc3))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc3, sizeof(data->hip_pSrc3))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc3, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc3, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateChannelCombinebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #4 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,5); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } -static vx_status VX_CALLBACK processChannelCombinebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelCombinebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshChannelCombinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_combine_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_combine_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshChannelCombinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_combine_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->pSrc3,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_combine_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->pSrc3,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + input_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPD: image: #4 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 5); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK initializeChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK processChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ChannelCombinebatchPDLocalData * data = new ChannelCombinebatchPDLocalData; - memset(data, 0, sizeof(*data)); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ChannelCombinebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshChannelCombinebatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_channel_combine_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_channel_combine_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshChannelCombinebatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_channel_combine_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1,(void *)data->hip_pSrc2,(void *)data->hip_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_channel_combine_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1,(void *)data->hip_pSrc2,(void *)data->hip_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshChannelCombinebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_channel_combine_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->pSrc3, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_channel_combine_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->pSrc3, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; +} + +static vx_status VX_CALLBACK initializeChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + ChannelCombinebatchPDLocalData *data = new ChannelCombinebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshChannelCombinebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshChannelCombinebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeChannelCombinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ChannelCombinebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ChannelCombinebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ChannelCombinebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelCombinebatchPD", - VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD, - processChannelCombinebatchPD, - 8, - validateChannelCombinebatchPD, - initializeChannelCombinebatchPD, - uninitializeChannelCombinebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelCombinebatchPD", + VX_KERNEL_RPP_CHANNELCOMBINEBATCHPD, + processChannelCombinebatchPD, + 8, + validateChannelCombinebatchPD, + initializeChannelCombinebatchPD, + uninitializeChannelCombinebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPS.cpp deleted file mode 100644 index 1808479bc8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ChannelCombinebatchPS.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ChannelCombinebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pSrc3; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pSrc3; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshChannelCombinebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelCombinebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc3, sizeof(data->cl_pSrc3))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc3, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[5], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateChannelCombinebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelCombinebatchPS: image: #4 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,5); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[5], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processChannelCombinebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelCombinebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshChannelCombinebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_combine_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_combine_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,(void *)data->cl_pSrc3,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshChannelCombinebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_combine_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->pSrc3,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_combine_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->pSrc3,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeChannelCombinebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelCombinebatchPSLocalData * data = new ChannelCombinebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshChannelCombinebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeChannelCombinebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelCombinebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ChannelCombinebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelCombinebatchPS", - VX_KERNEL_RPP_CHANNELCOMBINEBATCHPS, - processChannelCombinebatchPS, - 8, - validateChannelCombinebatchPS, - initializeChannelCombinebatchPS, - uninitializeChannelCombinebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ChannelExtract.cpp b/amd_openvx_extensions/amd_rpp/source/ChannelExtract.cpp deleted file mode 100644 index 3e3135da87..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ChannelExtract.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ChannelExtractLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u extractChannelNumber; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshChannelExtract(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelExtractLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->extractChannelNumber)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateChannelExtract(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelExtract: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processChannelExtract(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelExtractLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshChannelExtract(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_extract_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_extract_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshChannelExtract(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_extract_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->extractChannelNumber,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_extract_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->extractChannelNumber,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeChannelExtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelExtractLocalData * data = new ChannelExtractLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshChannelExtract(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeChannelExtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelExtractLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ChannelExtract_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelExtract", - VX_KERNEL_RPP_CHANNELEXTRACT, - processChannelExtract, - 4, - validateChannelExtract, - initializeChannelExtract, - uninitializeChannelExtract); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPD.cpp index df210df9a2..8d42577cdf 100644 --- a/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPD.cpp @@ -22,201 +22,260 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ChannelExtractbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *extractChannelNumber; +struct ChannelExtractbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *extractChannelNumber; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelExtractbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->extractChannelNumber = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->extractChannelNumber, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->extractChannelNumber, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateChannelExtractbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelExtractbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelExtractbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } -static vx_status VX_CALLBACK processChannelExtractbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelExtractbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshChannelExtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_extract_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_extract_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; +} -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshChannelExtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_extract_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_extract_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; +static vx_status VX_CALLBACK processChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ChannelExtractbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshChannelExtractbatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_channel_extract_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_channel_extract_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshChannelExtractbatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_channel_extract_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_channel_extract_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshChannelExtractbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_channel_extract_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->extractChannelNumber, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_channel_extract_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->extractChannelNumber, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ChannelExtractbatchPDLocalData * data = new ChannelExtractbatchPDLocalData; - memset(data, 0, sizeof(*data)); + ChannelExtractbatchPDLocalData *data = new ChannelExtractbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshChannelExtractbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->extractChannelNumber = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshChannelExtractbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeChannelExtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ChannelExtractbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ChannelExtractbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->extractChannelNumber); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ChannelExtractbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelExtractbatchPD", - VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD, - processChannelExtractbatchPD, - 7, - validateChannelExtractbatchPD, - initializeChannelExtractbatchPD, - uninitializeChannelExtractbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelExtractbatchPD", + VX_KERNEL_RPP_CHANNELEXTRACTBATCHPD, + processChannelExtractbatchPD, + 7, + validateChannelExtractbatchPD, + initializeChannelExtractbatchPD, + uninitializeChannelExtractbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPS.cpp deleted file mode 100644 index 5180e98a6d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ChannelExtractbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ChannelExtractbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u extractChannelNumber; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshChannelExtractbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ChannelExtractbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->extractChannelNumber)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateChannelExtractbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ChannelExtractbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processChannelExtractbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ChannelExtractbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshChannelExtractbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_extract_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_extract_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshChannelExtractbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_channel_extract_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_channel_extract_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->extractChannelNumber,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeChannelExtractbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelExtractbatchPSLocalData * data = new ChannelExtractbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshChannelExtractbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeChannelExtractbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ChannelExtractbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ChannelExtractbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ChannelExtractbatchPS", - VX_KERNEL_RPP_CHANNELEXTRACTBATCHPS, - processChannelExtractbatchPS, - 7, - validateChannelExtractbatchPS, - initializeChannelExtractbatchPS, - uninitializeChannelExtractbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTemperature.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTemperature.cpp deleted file mode 100644 index 693f6e5707..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ColorTemperature.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ColorTemperatureLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32s adjustmentValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTemperatureLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->adjustmentValue)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateColorTemperature(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_INT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTemperature: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processColorTemperature(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTemperatureLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshColorTemperature(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshColorTemperature(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->adjustmentValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->adjustmentValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperatureLocalData * data = new ColorTemperatureLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshColorTemperature(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeColorTemperature(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperatureLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ColorTemperature_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperature", - VX_KERNEL_RPP_COLORTEMPERATURE, - processColorTemperature, - 4, - validateColorTemperature, - initializeColorTemperature, - uninitializeColorTemperature); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPD.cpp index 30e5b59631..1c16763ff5 100644 --- a/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPD.cpp @@ -22,219 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ColorTemperaturebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_int32 *adjustmentValue; +struct ColorTemperaturebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_int32 *adjustmentValue; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTemperaturebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->adjustmentValue = (vx_int32 *)malloc(sizeof(vx_int32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_int32),data->adjustmentValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_int32), data->adjustmentValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateColorTemperaturebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTemperaturebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTemperaturebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processColorTemperaturebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTemperaturebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ColorTemperaturebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshColorTemperaturebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshColorTemperaturebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_color_temperature_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->adjustmentValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_color_temperature_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->adjustmentValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshColorTemperaturebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshColorTemperaturebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_color_temperature_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->adjustmentValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_color_temperature_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->adjustmentValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshColorTemperaturebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshColorTemperaturebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_color_temperature_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->adjustmentValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_color_temperature_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->adjustmentValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ColorTemperaturebatchPDLocalData * data = new ColorTemperaturebatchPDLocalData; - memset(data, 0, sizeof(*data)); + ColorTemperaturebatchPDLocalData *data = new ColorTemperaturebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshColorTemperaturebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->adjustmentValue = (vx_int32 *)malloc(sizeof(vx_int32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshColorTemperaturebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeColorTemperaturebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ColorTemperaturebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ColorTemperaturebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->adjustmentValue); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status ColorTemperaturebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperaturebatchPD", - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD, - processColorTemperaturebatchPD, - 7, - validateColorTemperaturebatchPD, - initializeColorTemperaturebatchPD, - uninitializeColorTemperaturebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperaturebatchPD", + VX_KERNEL_RPP_COLORTEMPERATUREBATCHPD, + processColorTemperaturebatchPD, + 7, + validateColorTemperaturebatchPD, + initializeColorTemperaturebatchPD, + uninitializeColorTemperaturebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPDROID.cpp deleted file mode 100644 index 1fe9f467d4..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ColorTemperaturebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_int32 *adjustmentValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshColorTemperaturebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTemperaturebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->adjustmentValue = (vx_int32 *)malloc(sizeof(vx_int32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_int32),data->adjustmentValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateColorTemperaturebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTemperaturebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processColorTemperaturebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTemperaturebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshColorTemperaturebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshColorTemperaturebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->adjustmentValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->adjustmentValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeColorTemperaturebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperaturebatchPDROIDLocalData * data = new ColorTemperaturebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshColorTemperaturebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeColorTemperaturebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperaturebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ColorTemperaturebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperaturebatchPDROID", - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPDROID, - processColorTemperaturebatchPDROID, - 11, - validateColorTemperaturebatchPDROID, - initializeColorTemperaturebatchPDROID, - uninitializeColorTemperaturebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPS.cpp deleted file mode 100644 index a01aa28126..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ColorTemperaturebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ColorTemperaturebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32s adjustmentValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshColorTemperaturebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTemperaturebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->adjustmentValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateColorTemperaturebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_INT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTemperaturebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processColorTemperaturebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTemperaturebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshColorTemperaturebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshColorTemperaturebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_temperature_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_temperature_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->adjustmentValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeColorTemperaturebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperaturebatchPSLocalData * data = new ColorTemperaturebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshColorTemperaturebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeColorTemperaturebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTemperaturebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ColorTemperaturebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTemperaturebatchPS", - VX_KERNEL_RPP_COLORTEMPERATUREBATCHPS, - processColorTemperaturebatchPS, - 7, - validateColorTemperaturebatchPS, - initializeColorTemperaturebatchPS, - uninitializeColorTemperaturebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTwist.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTwist.cpp deleted file mode 100644 index 12f9afc486..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ColorTwist.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ColorTwistLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f alpha; - Rpp32f beta; - Rpp32f hue; - Rpp32f sat; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTwistLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->alpha)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->beta)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->hue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->sat)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateColorTwist(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTwist: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processColorTwist(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTwistLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshColorTwist(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_twist_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->hue,data->sat,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_twist_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->alpha,data->beta,data->hue,data->sat,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshColorTwist(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_color_twist_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->alpha,data->beta,data->hue,data->sat,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_color_twist_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->alpha,data->beta,data->hue,data->sat,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTwistLocalData * data = new ColorTwistLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshColorTwist(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeColorTwist(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ColorTwistLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ColorTwist_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTwist", - VX_KERNEL_RPP_COLORTWIST, - processColorTwist, - 7, - validateColorTwist, - initializeColorTwist, - uninitializeColorTwist); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPD.cpp index c9e30a7448..61a4a9ff30 100644 --- a/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPD.cpp @@ -24,244 +24,267 @@ THE SOFTWARE. struct ColorTwistbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *alpha; - vx_float32 *beta; - vx_float32 *hue; - vx_float32 *sat; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *alpha; + vx_float32 *beta; + vx_float32 *hue; + vx_float32 *sat; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ColorTwistbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - vx_status copy_status; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->hue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_float32), data->sat, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->hue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_float32), data->sat, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateColorTwistbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTwistbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ColorTwistbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } static vx_status VX_CALLBACK processColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ColorTwistbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ColorTwistbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + vx_int32 output_format_toggle = 0; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshColorTwistbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_twist_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshColorTwistbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_color_twist_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshColorTwistbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_twist_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshColorTwistbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_color_twist_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshColorTwistbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_color_twist_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshColorTwistbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_color_twist_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->alpha, data->beta, data->hue, data->sat, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } static vx_status VX_CALLBACK initializeColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ColorTwistbatchPDLocalData *data = new ColorTwistbatchPDLocalData; - memset(data, 0, sizeof(*data)); + ColorTwistbatchPDLocalData *data = new ColorTwistbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->hue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->sat = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); + data->alpha = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->beta = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->hue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->sat = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshColorTwistbatchPD(node, parameters, num, data); + refreshColorTwistbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeColorTwistbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ColorTwistbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ColorTwistbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->alpha); + free(data->beta); + free(data->hue); + free(data->sat); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - free(data->alpha); - free(data->beta); - free(data->hue); - free(data->sat); - free(data->srcDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - delete (data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ColorTwistbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTwistbatchPD", - VX_KERNEL_RPP_COLORTWISTBATCHPD, - processColorTwistbatchPD, - 10, - validateColorTwistbatchPD, - initializeColorTwistbatchPD, - uninitializeColorTwistbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ColorTwistbatchPD", + VX_KERNEL_RPP_COLORTWISTBATCHPD, + processColorTwistbatchPD, + 10, + validateColorTwistbatchPD, + initializeColorTwistbatchPD, + uninitializeColorTwistbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - unsigned short idx = 0; // For Index - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + unsigned short idx = 0; // For Index + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, idx++, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPDROID.cpp deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ColorTwistbatchPS.cpp deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/amd_openvx_extensions/amd_rpp/source/Contrast.cpp b/amd_openvx_extensions/amd_rpp/source/Contrast.cpp deleted file mode 100644 index f7bf90efe9..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Contrast.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ContrastLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u min; - Rpp32u max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshContrast(vx_node node, const vx_reference *parameters, vx_uint32 num, ContrastLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->min)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->max)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateContrast(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Contrast: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processContrast(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ContrastLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshContrast(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->min,data->max,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->min,data->max,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshContrast(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->min,data->max,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->min,data->max,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeContrast(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastLocalData * data = new ContrastLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshContrast(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeContrast(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Contrast_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Contrast", - VX_KERNEL_RPP_CONTRAST, - processContrast, - 5, - validateContrast, - initializeContrast, - uninitializeContrast); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ContrastbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ContrastbatchPD.cpp index 58413b0f96..977ebdf9d2 100644 --- a/amd_openvx_extensions/amd_rpp/source/ContrastbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ContrastbatchPD.cpp @@ -22,225 +22,268 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ContrastbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *min; - vx_uint32 *max; +struct ContrastbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *min; + vx_uint32 *max; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ContrastbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->min = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->max = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateContrastbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ContrastbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ContrastbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processContrastbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ContrastbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ContrastbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshContrastbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshContrastbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_contrast_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_contrast_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshContrastbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshContrastbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_contrast_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_contrast_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshContrastbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshContrastbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_contrast_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_contrast_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ContrastbatchPDLocalData * data = new ContrastbatchPDLocalData; - memset(data, 0, sizeof(*data)); + ContrastbatchPDLocalData *data = new ContrastbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshContrastbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->min = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->max = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshContrastbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeContrastbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ContrastbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ContrastbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->min); + free(data->max); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status ContrastbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ContrastbatchPD", - VX_KERNEL_RPP_CONTRASTBATCHPD, - processContrastbatchPD, - 8, - validateContrastbatchPD, - initializeContrastbatchPD, - uninitializeContrastbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ContrastbatchPD", + VX_KERNEL_RPP_CONTRASTBATCHPD, + processContrastbatchPD, + 8, + validateContrastbatchPD, + initializeContrastbatchPD, + uninitializeContrastbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ContrastbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ContrastbatchPDROID.cpp deleted file mode 100644 index a2a94d80d0..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ContrastbatchPDROID.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ContrastbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *min; - vx_uint32 *max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshContrastbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ContrastbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->min = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->max = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateContrastbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ContrastbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processContrastbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ContrastbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshContrastbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshContrastbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeContrastbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastbatchPDROIDLocalData * data = new ContrastbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshContrastbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeContrastbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ContrastbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ContrastbatchPDROID", - VX_KERNEL_RPP_CONTRASTBATCHPDROID, - processContrastbatchPDROID, - 12, - validateContrastbatchPDROID, - initializeContrastbatchPDROID, - uninitializeContrastbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ContrastbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ContrastbatchPS.cpp deleted file mode 100644 index 736c8ee92d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ContrastbatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ContrastbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u min; - Rpp32u max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshContrastbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ContrastbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->min)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->max)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateContrastbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ContrastbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processContrastbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ContrastbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshContrastbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshContrastbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_contrast_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_contrast_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeContrastbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastbatchPSLocalData * data = new ContrastbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshContrastbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeContrastbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ContrastbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ContrastbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ContrastbatchPS", - VX_KERNEL_RPP_CONTRASTBATCHPS, - processContrastbatchPS, - 8, - validateContrastbatchPS, - initializeContrastbatchPS, - uninitializeContrastbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ControlFlow.cpp b/amd_openvx_extensions/amd_rpp/source/ControlFlow.cpp deleted file mode 100644 index 81a37b9247..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ControlFlow.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ControlFlowLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - Rpp32u type; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshControlFlow(vx_node node, const vx_reference *parameters, vx_uint32 num, ControlFlowLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->type)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateControlFlow(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlow: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlow: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processControlFlow(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ControlFlowLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshControlFlow(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->type,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->type,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshControlFlow(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->type,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->type,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeControlFlow(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowLocalData * data = new ControlFlowLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshControlFlow(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeControlFlow(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ControlFlow_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ControlFlow", - VX_KERNEL_RPP_CONTROLFLOW, - processControlFlow, - 5, - validateControlFlow, - initializeControlFlow, - uninitializeControlFlow); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPD.cpp deleted file mode 100644 index 7fa2bd6e94..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPD.cpp +++ /dev/null @@ -1,236 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ControlFlowbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_uint32 type; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshControlFlowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ControlFlowbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - // data->type = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - // copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->type)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateControlFlowbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlowbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processControlFlowbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ControlFlowbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshControlFlowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->type,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->type,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshControlFlowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->type,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->type,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeControlFlowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowbatchPDLocalData * data = new ControlFlowbatchPDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshControlFlowbatchPD(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeControlFlowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ControlFlowbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ControlFlowbatchPD", - VX_KERNEL_RPP_CONTROLFLOWBATCHPD, - processControlFlowbatchPD, - 8, - validateControlFlowbatchPD, - initializeControlFlowbatchPD, - uninitializeControlFlowbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPDROID.cpp deleted file mode 100644 index 4fbee80dcf..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPDROID.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ControlFlowbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_uint32 type; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshControlFlowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ControlFlowbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - // data->type = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - // copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->type)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateControlFlowbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlowbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlowbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processControlFlowbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ControlFlowbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshControlFlowbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->type,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->type,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshControlFlowbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->type,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->type,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeControlFlowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowbatchPDROIDLocalData * data = new ControlFlowbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshControlFlowbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeControlFlowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ControlFlowbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ControlFlowbatchPDROID", - VX_KERNEL_RPP_CONTROLFLOWBATCHPDROID, - processControlFlowbatchPDROID, - 12, - validateControlFlowbatchPDROID, - initializeControlFlowbatchPDROID, - uninitializeControlFlowbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPS.cpp deleted file mode 100644 index 24730b7995..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ControlFlowbatchPS.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ControlFlowbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - Rpp32u type; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshControlFlowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ControlFlowbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->type)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateControlFlowbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlowbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ControlFlowbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processControlFlowbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ControlFlowbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshControlFlowbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->type,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->type,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshControlFlowbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_control_flow_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->type,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_control_flow_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->type,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeControlFlowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowbatchPSLocalData * data = new ControlFlowbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshControlFlowbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeControlFlowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ControlFlowbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ControlFlowbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ControlFlowbatchPS", - VX_KERNEL_RPP_CONTROLFLOWBATCHPS, - processControlFlowbatchPS, - 8, - validateControlFlowbatchPS, - initializeControlFlowbatchPS, - uninitializeControlFlowbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalizePD.cpp b/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalizePD.cpp index 68703b583a..6ae0fe0883 100644 --- a/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalizePD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalizePD.cpp @@ -22,267 +22,308 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct CropMirrorNormalizebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *start_x; - vx_uint32 *start_y; - vx_float32 *mean; +struct CropMirrorNormalizebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *start_x; + vx_uint32 *start_y; + vx_float32 *mean; vx_float32 *std_dev; vx_uint32 *mirror; vx_uint32 chnShift; //NHWC to NCHW - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CropMirrorNormalizebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32),data->start_x, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32),data->start_y, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_float32),data->mean, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_float32),data->std_dev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32),data->mirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->start_x, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->start_y, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_float32), data->mean, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_float32), data->std_dev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32), data->mirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->chnShift)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateCropMirrorNormalizebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[13], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #13 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CropMirrorNormalizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[13], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #13 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CropMirrorNormalizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processCropMirrorNormalizebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CropMirrorNormalizebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + CropMirrorNormalizebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->start_x,data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift ,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->start_x,data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift ,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshCropMirrorNormalizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->dstDimensions,data->maxDstDimensions,data->start_x,data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift ,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->dstDimensions,data->maxDstDimensions,data->start_x,data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift ,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshCropMirrorNormalizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->start_x,data->start_y,data->mean, data->std_dev, data->mirror, data->chnShift,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->start_x,data->start_y,data->mean, data->std_dev, data->mirror, data->chnShift,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshCropMirrorNormalizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_crop_mirror_normalize_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_crop_mirror_normalize_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, data->mean, data->std_dev, data->mirror, data->chnShift, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CropMirrorNormalizebatchPDLocalData * data = new CropMirrorNormalizebatchPDLocalData; - memset(data, 0, sizeof(*data)); + CropMirrorNormalizebatchPDLocalData *data = new CropMirrorNormalizebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[13], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[12], &data->nbatchSize)); - data->start_x = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->start_y = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->mean = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->std_dev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); - data->mirror = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshCropMirrorNormalizebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[13], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[12], &data->nbatchSize)); + data->start_x = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->start_y = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->mean = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->std_dev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->mirror = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshCropMirrorNormalizebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeCropMirrorNormalizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CropMirrorNormalizebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + CropMirrorNormalizebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - free(data->start_x); - free(data->start_y); - free(data->mean); - free(data->std_dev); - free(data->mirror); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->start_x); + free(data->start_y); + free(data->mean); + free(data->std_dev); + free(data->mirror); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status CropMirrorNormalizePD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CropMirrorNormalizebatchPD", - VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD, - processCropMirrorNormalizebatchPD, - 14, - validateCropMirrorNormalizebatchPD, - initializeCropMirrorNormalizebatchPD, - uninitializeCropMirrorNormalizebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CropMirrorNormalizebatchPD", + VX_KERNEL_RPP_CROPMIRRORNORMALIZEBATCHPD, + processCropMirrorNormalizebatchPD, + 14, + validateCropMirrorNormalizebatchPD, + initializeCropMirrorNormalizebatchPD, + uninitializeCropMirrorNormalizebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/CropPD.cpp b/amd_openvx_extensions/amd_rpp/source/CropPD.cpp index 23274e13d5..af836902d0 100644 --- a/amd_openvx_extensions/amd_rpp/source/CropPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/CropPD.cpp @@ -24,265 +24,288 @@ THE SOFTWARE. struct CropPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *start_x; - vx_uint32 *start_y; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *start_x; + vx_uint32 *start_y; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CropPDLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->start_x, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->start_y, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->start_x, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->start_y, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateCropPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CropPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CropPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } static vx_status VX_CALLBACK processCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CropPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + CropPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vx_int32 output_format_toggle = 0; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + vx_int32 output_format_toggle = 0; + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCropPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshCropPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_crop_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_crop_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshCropPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshCropPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_crop_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_crop_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshCropPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_crop_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_crop_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshCropPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_crop_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_crop_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->start_x, data->start_y, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } static vx_status VX_CALLBACK initializeCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CropPDLocalData *data = new CropPDLocalData; - memset(data, 0, sizeof(*data)); + CropPDLocalData *data = new CropPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - data->start_x = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->start_y = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshCropPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); + data->start_x = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->start_y = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshCropPD(node, parameters, num, data); #if ENABLE_OPENCL - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeCropPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CropPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + CropPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - free(data->start_x); - free(data->start_y); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - delete (data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->start_x); + free(data->start_y); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status CropPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CropPD", - VX_KERNEL_RPP_CROPPD, - processCropPD, - 10, - validateCropPD, - initializeCropPD, - uninitializeCropPD); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CropPD", + VX_KERNEL_RPP_CROPPD, + processCropPD, + 10, + validateCropPD, + initializeCropPD, + uninitializeCropPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; - + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/CustomConvolution.cpp b/amd_openvx_extensions/amd_rpp/source/CustomConvolution.cpp deleted file mode 100644 index 1978a861f1..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/CustomConvolution.cpp +++ /dev/null @@ -1,213 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CustomConvolutionLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *kernel; - Rpp32u kernelWidth; - Rpp32u kernelHeight; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCustomConvolution(vx_node node, const vx_reference *parameters, vx_uint32 num, CustomConvolutionLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernel = (Rpp32f *)malloc(sizeof(Rpp32f) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp32f),data->kernel, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->kernelWidth)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelHeight)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCustomConvolution(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CustomConvolution: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCustomConvolution(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CustomConvolutionLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCustomConvolution(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshCustomConvolution(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCustomConvolution(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionLocalData * data = new CustomConvolutionLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshCustomConvolution(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCustomConvolution(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status CustomConvolution_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CustomConvolution", - VX_KERNEL_RPP_CUSTOMCONVOLUTION, - processCustomConvolution, - 6, - validateCustomConvolution, - initializeCustomConvolution, - uninitializeCustomConvolution); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPD.cpp index 464151016d..6851ecbcc9 100644 --- a/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPD.cpp @@ -22,211 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct CustomConvolutionbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *kernel; - vx_uint32 *kernelWidth; - vx_uint32 *kernelHeight; +struct CustomConvolutionbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_array *kernel; + size_t kernel_arr_size; + vx_uint32 *kernelWidth; + vx_uint32 *kernelHeight; + RppiSize *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, CustomConvolutionbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernel = (vx_array *)malloc(sizeof(vx_array) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_array),data->kernel, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->kernelWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->kernelHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->kernel_arr_size, sizeof(vx_array), data->kernel, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->kernelSize[i].width = data->kernelWidth[i]; + data->kernelSize[i].height = data->kernelHeight[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateCustomConvolutionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CustomConvolutionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CustomConvolutionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } -static vx_status VX_CALLBACK processCustomConvolutionbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CustomConvolutionbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCustomConvolutionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; +} -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshCustomConvolutionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; +static vx_status VX_CALLBACK processCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + CustomConvolutionbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshCustomConvolutionbatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshCustomConvolutionbatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->kernel,data->kernelSize,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshCustomConvolutionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_custom_convolution_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernel, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernel, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CustomConvolutionbatchPDLocalData * data = new CustomConvolutionbatchPDLocalData; - memset(data, 0, sizeof(*data)); + CustomConvolutionbatchPDLocalData *data = new CustomConvolutionbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshCustomConvolutionbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &data->kernel_arr_size, sizeof(data->kernel_arr_size))); + data->kernel = (vx_array *)malloc(sizeof(vx_array) * data->kernel_arr_size); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->kernelWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->kernelHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + refreshCustomConvolutionbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeCustomConvolutionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CustomConvolutionbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + CustomConvolutionbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelWidth); + free(data->kernelHeight); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status CustomConvolutionbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CustomConvolutionbatchPD", - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD, - processCustomConvolutionbatchPD, - 9, - validateCustomConvolutionbatchPD, - initializeCustomConvolutionbatchPD, - uninitializeCustomConvolutionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CustomConvolutionbatchPD", + VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPD, + processCustomConvolutionbatchPD, + 9, + validateCustomConvolutionbatchPD, + initializeCustomConvolutionbatchPD, + uninitializeCustomConvolutionbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPDROID.cpp deleted file mode 100644 index 2a3953ffa5..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPDROID.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CustomConvolutionbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *kernel; - vx_uint32 *kernelWidth; - vx_uint32 *kernelHeight; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCustomConvolutionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, CustomConvolutionbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernel = (vx_array *)malloc(sizeof(vx_array) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_array),data->kernel, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->kernelWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->kernelHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCustomConvolutionbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CustomConvolutionbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCustomConvolutionbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CustomConvolutionbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCustomConvolutionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshCustomConvolutionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCustomConvolutionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionbatchPDROIDLocalData * data = new CustomConvolutionbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshCustomConvolutionbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCustomConvolutionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status CustomConvolutionbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CustomConvolutionbatchPDROID", - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPDROID, - processCustomConvolutionbatchPDROID, - 13, - validateCustomConvolutionbatchPDROID, - initializeCustomConvolutionbatchPDROID, - uninitializeCustomConvolutionbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPS.cpp deleted file mode 100644 index 7e5f0fe7ae..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/CustomConvolutionbatchPS.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct CustomConvolutionbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *kernel; - Rpp32u kernelWidth; - Rpp32u kernelHeight; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshCustomConvolutionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, CustomConvolutionbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernel = (vx_array *)malloc(sizeof(vx_array) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_array),data->kernel, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->kernelWidth)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->kernelHeight)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateCustomConvolutionbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: CustomConvolutionbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processCustomConvolutionbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - CustomConvolutionbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshCustomConvolutionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshCustomConvolutionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_custom_convolution_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_custom_convolution_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernel,data->kernelWidth,data->kernelHeight,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeCustomConvolutionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionbatchPSLocalData * data = new CustomConvolutionbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshCustomConvolutionbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeCustomConvolutionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - CustomConvolutionbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status CustomConvolutionbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.CustomConvolutionbatchPS", - VX_KERNEL_RPP_CUSTOMCONVOLUTIONBATCHPS, - processCustomConvolutionbatchPS, - 9, - validateCustomConvolutionbatchPS, - initializeCustomConvolutionbatchPS, - uninitializeCustomConvolutionbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/DataObjectCopy.cpp b/amd_openvx_extensions/amd_rpp/source/DataObjectCopy.cpp deleted file mode 100644 index 7c5051fae7..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/DataObjectCopy.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DataObjectCopyLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDataObjectCopy(vx_node node, const vx_reference *parameters, vx_uint32 num, DataObjectCopyLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDataObjectCopy(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DataObjectCopy: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDataObjectCopy(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DataObjectCopyLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDataObjectCopy(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_data_object_copy_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_data_object_copy_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDataObjectCopy(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_data_object_copy_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_data_object_copy_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDataObjectCopy(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopyLocalData * data = new DataObjectCopyLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDataObjectCopy(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDataObjectCopy(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopyLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status DataObjectCopy_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DataObjectCopy", - VX_KERNEL_RPP_DATAOBJECTCOPY, - processDataObjectCopy, - 3, - validateDataObjectCopy, - initializeDataObjectCopy, - uninitializeDataObjectCopy); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPD.cpp index 239c9e9ee8..b1e30f9c7b 100644 --- a/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPD.cpp @@ -22,196 +22,260 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct DataObjectCopybatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct DataObjectCopybatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, DataObjectCopybatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateDataObjectCopybatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DataObjectCopybatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DataObjectCopybatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processDataObjectCopybatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DataObjectCopybatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + DataObjectCopybatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDataObjectCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_data_object_copy_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshDataObjectCopybatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_data_object_copy_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshDataObjectCopybatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_data_object_copy_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDataObjectCopybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_data_object_copy_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshDataObjectCopybatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_data_object_copy_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - DataObjectCopybatchPDLocalData * data = new DataObjectCopybatchPDLocalData; - memset(data, 0, sizeof(*data)); + DataObjectCopybatchPDLocalData *data = new DataObjectCopybatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDataObjectCopybatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshDataObjectCopybatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeDataObjectCopybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - DataObjectCopybatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + DataObjectCopybatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status DataObjectCopybatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DataObjectCopybatchPD", - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD, - processDataObjectCopybatchPD, - 6, - validateDataObjectCopybatchPD, - initializeDataObjectCopybatchPD, - uninitializeDataObjectCopybatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DataObjectCopybatchPD", + VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPD, + processDataObjectCopybatchPD, + 6, + validateDataObjectCopybatchPD, + initializeDataObjectCopybatchPD, + uninitializeDataObjectCopybatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPDROID.cpp deleted file mode 100644 index 38e14603fe..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DataObjectCopybatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDataObjectCopybatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, DataObjectCopybatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDataObjectCopybatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DataObjectCopybatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDataObjectCopybatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DataObjectCopybatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDataObjectCopybatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_data_object_copy_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDataObjectCopybatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_data_object_copy_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_data_object_copy_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDataObjectCopybatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopybatchPDROIDLocalData * data = new DataObjectCopybatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDataObjectCopybatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDataObjectCopybatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopybatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status DataObjectCopybatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DataObjectCopybatchPDROID", - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPDROID, - processDataObjectCopybatchPDROID, - 10, - validateDataObjectCopybatchPDROID, - initializeDataObjectCopybatchPDROID, - uninitializeDataObjectCopybatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPS.cpp deleted file mode 100644 index e44b0e8045..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/DataObjectCopybatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DataObjectCopybatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDataObjectCopybatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, DataObjectCopybatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDataObjectCopybatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DataObjectCopybatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDataObjectCopybatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DataObjectCopybatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDataObjectCopybatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_data_object_copy_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDataObjectCopybatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_data_object_copy_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_data_object_copy_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDataObjectCopybatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopybatchPSLocalData * data = new DataObjectCopybatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDataObjectCopybatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDataObjectCopybatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DataObjectCopybatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status DataObjectCopybatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DataObjectCopybatchPS", - VX_KERNEL_RPP_DATAOBJECTCOPYBATCHPS, - processDataObjectCopybatchPS, - 6, - validateDataObjectCopybatchPS, - initializeDataObjectCopybatchPS, - uninitializeDataObjectCopybatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Dilate.cpp b/amd_openvx_extensions/amd_rpp/source/Dilate.cpp deleted file mode 100644 index 7a76752706..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Dilate.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DilateLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDilate(vx_node node, const vx_reference *parameters, vx_uint32 num, DilateLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDilate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Dilate: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDilate(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DilateLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDilate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDilate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDilate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilateLocalData * data = new DilateLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDilate(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDilate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilateLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Dilate_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Dilate", - VX_KERNEL_RPP_DILATE, - processDilate, - 4, - validateDilate, - initializeDilate, - uninitializeDilate); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/DilatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/DilatebatchPD.cpp index 16d9a5908b..708cb2e554 100644 --- a/amd_openvx_extensions/amd_rpp/source/DilatebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/DilatebatchPD.cpp @@ -22,201 +22,264 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct DilatebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct DilatebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, DilatebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateDilatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DilatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DilatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processDilatebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DilatebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + DilatebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDilatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshDilatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_dilate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_dilate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshDilatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_dilate_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_dilate_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDilatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshDilatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_dilate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_dilate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - DilatebatchPDLocalData * data = new DilatebatchPDLocalData; - memset(data, 0, sizeof(*data)); + DilatebatchPDLocalData *data = new DilatebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDilatebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshDilatebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeDilatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - DilatebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + DilatebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status DilatebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DilatebatchPD", - VX_KERNEL_RPP_DILATEBATCHPD, - processDilatebatchPD, - 7, - validateDilatebatchPD, - initializeDilatebatchPD, - uninitializeDilatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DilatebatchPD", + VX_KERNEL_RPP_DILATEBATCHPD, + processDilatebatchPD, + 7, + validateDilatebatchPD, + initializeDilatebatchPD, + uninitializeDilatebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/DilatebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/DilatebatchPDROID.cpp deleted file mode 100644 index 7bf7b9762c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/DilatebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DilatebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDilatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, DilatebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDilatebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DilatebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDilatebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DilatebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDilatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDilatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDilatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilatebatchPDROIDLocalData * data = new DilatebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDilatebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDilatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilatebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status DilatebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DilatebatchPDROID", - VX_KERNEL_RPP_DILATEBATCHPDROID, - processDilatebatchPDROID, - 11, - validateDilatebatchPDROID, - initializeDilatebatchPDROID, - uninitializeDilatebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/DilatebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/DilatebatchPS.cpp deleted file mode 100644 index 5d66791e63..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/DilatebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct DilatebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshDilatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, DilatebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateDilatebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: DilatebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processDilatebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - DilatebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshDilatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshDilatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_dilate_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_dilate_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeDilatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilatebatchPSLocalData * data = new DilatebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshDilatebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeDilatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - DilatebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status DilatebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.DilatebatchPS", - VX_KERNEL_RPP_DILATEBATCHPS, - processDilatebatchPS, - 7, - validateDilatebatchPS, - initializeDilatebatchPS, - uninitializeDilatebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Erode.cpp b/amd_openvx_extensions/amd_rpp/source/Erode.cpp deleted file mode 100644 index fceaa84afe..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Erode.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ErodeLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshErode(vx_node node, const vx_reference *parameters, vx_uint32 num, ErodeLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateErode(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Erode: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processErode(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ErodeLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshErode(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshErode(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeErode(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodeLocalData * data = new ErodeLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshErode(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeErode(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodeLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Erode_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Erode", - VX_KERNEL_RPP_ERODE, - processErode, - 4, - validateErode, - initializeErode, - uninitializeErode); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ErodebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ErodebatchPD.cpp index b1c27ed5e1..b89bc19519 100644 --- a/amd_openvx_extensions/amd_rpp/source/ErodebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ErodebatchPD.cpp @@ -22,201 +22,265 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ErodebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct ErodebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ErodebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateErodebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ErodebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ErodebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processErodebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ErodebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ErodebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshErodebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshErodebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_erode_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_erode_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshErodebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_erode_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_erode_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshErodebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshErodebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_erode_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_erode_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ErodebatchPDLocalData * data = new ErodebatchPDLocalData; - memset(data, 0, sizeof(*data)); + ErodebatchPDLocalData *data = new ErodebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshErodebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshErodebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeErodebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ErodebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ErodebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ErodebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ErodebatchPD", - VX_KERNEL_RPP_ERODEBATCHPD, - processErodebatchPD, - 7, - validateErodebatchPD, - initializeErodebatchPD, - uninitializeErodebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ErodebatchPD", + VX_KERNEL_RPP_ERODEBATCHPD, + processErodebatchPD, + 7, + validateErodebatchPD, + initializeErodebatchPD, + uninitializeErodebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ErodebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ErodebatchPDROID.cpp deleted file mode 100644 index 63267f386d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ErodebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ErodebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshErodebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ErodebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateErodebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ErodebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processErodebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ErodebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshErodebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshErodebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeErodebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodebatchPDROIDLocalData * data = new ErodebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshErodebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeErodebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ErodebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ErodebatchPDROID", - VX_KERNEL_RPP_ERODEBATCHPDROID, - processErodebatchPDROID, - 11, - validateErodebatchPDROID, - initializeErodebatchPDROID, - uninitializeErodebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ErodebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ErodebatchPS.cpp deleted file mode 100644 index ddffe05eb6..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ErodebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ErodebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshErodebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ErodebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateErodebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ErodebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processErodebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ErodebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshErodebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshErodebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_erode_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_erode_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeErodebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodebatchPSLocalData * data = new ErodebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshErodebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeErodebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ErodebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ErodebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ErodebatchPS", - VX_KERNEL_RPP_ERODEBATCHPS, - processErodebatchPS, - 7, - validateErodebatchPS, - initializeErodebatchPS, - uninitializeErodebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ExclusiveOR.cpp b/amd_openvx_extensions/amd_rpp/source/ExclusiveOR.cpp deleted file mode 100644 index 75b6e56b34..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ExclusiveOR.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExclusiveORLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExclusiveOR(vx_node node, const vx_reference *parameters, vx_uint32 num, ExclusiveORLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExclusiveOR(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveOR: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveOR: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExclusiveOR(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExclusiveORLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExclusiveOR(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExclusiveOR(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExclusiveOR(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORLocalData * data = new ExclusiveORLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExclusiveOR(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExclusiveOR(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ExclusiveOR_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExclusiveOR", - VX_KERNEL_RPP_EXCLUSIVEOR, - processExclusiveOR, - 4, - validateExclusiveOR, - initializeExclusiveOR, - uninitializeExclusiveOR); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPD.cpp index 17cba1d57f..fef0ba377a 100644 --- a/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPD.cpp @@ -22,209 +22,275 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ExclusiveORbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct ExclusiveORbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ExclusiveORbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateExclusiveORbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processExclusiveORbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExclusiveORbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ExclusiveORbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshExclusiveORbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshExclusiveORbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshExclusiveORbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ExclusiveORbatchPDLocalData * data = new ExclusiveORbatchPDLocalData; - memset(data, 0, sizeof(*data)); + ExclusiveORbatchPDLocalData *data = new ExclusiveORbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExclusiveORbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshExclusiveORbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeExclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ExclusiveORbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ExclusiveORbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ExclusiveORbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExclusiveORbatchPD", - VX_KERNEL_RPP_EXCLUSIVEORBATCHPD, - processExclusiveORbatchPD, - 7, - validateExclusiveORbatchPD, - initializeExclusiveORbatchPD, - uninitializeExclusiveORbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExclusiveORbatchPD", + VX_KERNEL_RPP_EXCLUSIVEORBATCHPD, + processExclusiveORbatchPD, + 7, + validateExclusiveORbatchPD, + initializeExclusiveORbatchPD, + uninitializeExclusiveORbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPDROID.cpp deleted file mode 100644 index 37ba4d0961..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExclusiveORbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExclusiveORbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ExclusiveORbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExclusiveORbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExclusiveORbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExclusiveORbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExclusiveORbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExclusiveORbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExclusiveORbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORbatchPDROIDLocalData * data = new ExclusiveORbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExclusiveORbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExclusiveORbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ExclusiveORbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExclusiveORbatchPDROID", - VX_KERNEL_RPP_EXCLUSIVEORBATCHPDROID, - processExclusiveORbatchPDROID, - 11, - validateExclusiveORbatchPDROID, - initializeExclusiveORbatchPDROID, - uninitializeExclusiveORbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPS.cpp deleted file mode 100644 index 18dea4a722..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ExclusiveORbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExclusiveORbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExclusiveORbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ExclusiveORbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExclusiveORbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExclusiveORbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExclusiveORbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExclusiveORbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExclusiveORbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExclusiveORbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exclusive_OR_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exclusive_OR_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExclusiveORbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORbatchPSLocalData * data = new ExclusiveORbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExclusiveORbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExclusiveORbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExclusiveORbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ExclusiveORbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExclusiveORbatchPS", - VX_KERNEL_RPP_EXCLUSIVEORBATCHPS, - processExclusiveORbatchPS, - 7, - validateExclusiveORbatchPS, - initializeExclusiveORbatchPS, - uninitializeExclusiveORbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Exposure.cpp b/amd_openvx_extensions/amd_rpp/source/Exposure.cpp deleted file mode 100644 index d596eb91ab..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Exposure.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExposureLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f exposureValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExposure(vx_node node, const vx_reference *parameters, vx_uint32 num, ExposureLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->exposureValue)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExposure(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Exposure: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExposure(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExposureLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExposure(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->exposureValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->exposureValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExposure(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->exposureValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->exposureValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExposure(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposureLocalData * data = new ExposureLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExposure(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExposure(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposureLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Exposure_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Exposure", - VX_KERNEL_RPP_EXPOSURE, - processExposure, - 4, - validateExposure, - initializeExposure, - uninitializeExposure); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ExposurebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ExposurebatchPD.cpp index ba92eb1ea7..53a9c34d5e 100644 --- a/amd_openvx_extensions/amd_rpp/source/ExposurebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ExposurebatchPD.cpp @@ -22,219 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ExposurebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *exposureValue; +struct ExposurebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *exposureValue; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ExposurebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->exposureValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->exposureValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->exposureValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateExposurebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExposurebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExposurebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processExposurebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExposurebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ExposurebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExposurebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshExposurebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_exposure_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->exposureValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_exposure_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->exposureValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshExposurebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshExposurebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_exposure_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->exposureValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_exposure_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->exposureValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExposurebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshExposurebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_exposure_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->exposureValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_exposure_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->exposureValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ExposurebatchPDLocalData * data = new ExposurebatchPDLocalData; - memset(data, 0, sizeof(*data)); + ExposurebatchPDLocalData *data = new ExposurebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExposurebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->exposureValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshExposurebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeExposurebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ExposurebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ExposurebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->exposureValue); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status ExposurebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExposurebatchPD", - VX_KERNEL_RPP_EXPOSUREBATCHPD, - processExposurebatchPD, - 7, - validateExposurebatchPD, - initializeExposurebatchPD, - uninitializeExposurebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExposurebatchPD", + VX_KERNEL_RPP_EXPOSUREBATCHPD, + processExposurebatchPD, + 7, + validateExposurebatchPD, + initializeExposurebatchPD, + uninitializeExposurebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ExposurebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ExposurebatchPDROID.cpp deleted file mode 100644 index 3ca4ee07ff..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ExposurebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExposurebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *exposureValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExposurebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ExposurebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->exposureValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->exposureValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExposurebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExposurebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExposurebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExposurebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExposurebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->exposureValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->exposureValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExposurebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->exposureValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->exposureValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExposurebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposurebatchPDROIDLocalData * data = new ExposurebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExposurebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExposurebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposurebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ExposurebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExposurebatchPDROID", - VX_KERNEL_RPP_EXPOSUREBATCHPDROID, - processExposurebatchPDROID, - 11, - validateExposurebatchPDROID, - initializeExposurebatchPDROID, - uninitializeExposurebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ExposurebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ExposurebatchPS.cpp deleted file mode 100644 index 73e4bde977..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ExposurebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ExposurebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f exposureValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshExposurebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ExposurebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->exposureValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateExposurebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ExposurebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processExposurebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ExposurebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshExposurebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshExposurebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_exposure_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_exposure_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->exposureValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeExposurebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposurebatchPSLocalData * data = new ExposurebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshExposurebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeExposurebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ExposurebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ExposurebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ExposurebatchPS", - VX_KERNEL_RPP_EXPOSUREBATCHPS, - processExposurebatchPS, - 7, - validateExposurebatchPS, - initializeExposurebatchPS, - uninitializeExposurebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FastCornerDetector.cpp b/amd_openvx_extensions/amd_rpp/source/FastCornerDetector.cpp index bda9ced5fd..b75a26c48d 100644 --- a/amd_openvx_extensions/amd_rpp/source/FastCornerDetector.cpp +++ b/amd_openvx_extensions/amd_rpp/source/FastCornerDetector.cpp @@ -22,190 +22,246 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct FastCornerDetectorLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u noOfPixels; - Rpp8u threshold; - Rpp32u nonMaxKernelSize; +struct FastCornerDetectorLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32u noOfPixels; + Rpp8u threshold; + Rpp32u nonMaxKernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num, FastCornerDetectorLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->noOfPixels)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->threshold)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nonMaxKernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->noOfPixels)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->threshold)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nonMaxKernelSize)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateFastCornerDetector(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FastCornerDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT8) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FastCornerDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processFastCornerDetector(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FastCornerDetectorLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFastCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fast_corner_detector_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fast_corner_detector_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFastCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fast_corner_detector_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fast_corner_detector_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; +static vx_status VX_CALLBACK processFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + FastCornerDetectorLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshFastCornerDetector(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_fast_corner_detector_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_fast_corner_detector_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshFastCornerDetector(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_fast_corner_detector_u8_pln1_gpu((void *)data->hip_pSrc,data->srcDimensions,(void *)data->hip_pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_fast_corner_detector_u8_pkd3_gpu((void *)data->hip_pSrc,data->srcDimensions,(void *)data->hip_pDst,data->noOfPixels,data->threshold,data->nonMaxKernelSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshFastCornerDetector(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fast_corner_detector_u8_pln1_host(data->pSrc, data->srcDimensions, data->pDst, data->noOfPixels, data->threshold, data->nonMaxKernelSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fast_corner_detector_u8_pkd3_host(data->pSrc, data->srcDimensions, data->pDst, data->noOfPixels, data->threshold, data->nonMaxKernelSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FastCornerDetectorLocalData * data = new FastCornerDetectorLocalData; - memset(data, 0, sizeof(*data)); + FastCornerDetectorLocalData *data = new FastCornerDetectorLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFastCornerDetector(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshFastCornerDetector(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeFastCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FastCornerDetectorLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + FastCornerDetectorLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status FastCornerDetector_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FastCornerDetector", - VX_KERNEL_RPP_FASTCORNERDETECTOR, - processFastCornerDetector, - 6, - validateFastCornerDetector, - initializeFastCornerDetector, - uninitializeFastCornerDetector); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FastCornerDetector", + VX_KERNEL_RPP_FASTCORNERDETECTOR, + processFastCornerDetector, + 6, + validateFastCornerDetector, + initializeFastCornerDetector, + uninitializeFastCornerDetector); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/Fisheye.cpp b/amd_openvx_extensions/amd_rpp/source/Fisheye.cpp deleted file mode 100644 index 7ae8bef635..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Fisheye.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FisheyeLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFisheye(vx_node node, const vx_reference *parameters, vx_uint32 num, FisheyeLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFisheye(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Fisheye: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFisheye(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FisheyeLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFisheye(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFisheye(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFisheye(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyeLocalData * data = new FisheyeLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFisheye(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFisheye(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyeLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Fisheye_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Fisheye", - VX_KERNEL_RPP_FISHEYE, - processFisheye, - 3, - validateFisheye, - initializeFisheye, - uninitializeFisheye); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FisheyebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/FisheyebatchPD.cpp index 98cdff4857..c365560b9e 100644 --- a/amd_openvx_extensions/amd_rpp/source/FisheyebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/FisheyebatchPD.cpp @@ -22,214 +22,258 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct FisheyebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct FisheyebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, FisheyebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateFisheyebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FisheyebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FisheyebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processFisheyebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FisheyebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + FisheyebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFisheyebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshFisheyebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshFisheyebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshFisheyebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fisheye_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fisheye_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFisheyebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshFisheyebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fisheye_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fisheye_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FisheyebatchPDLocalData * data = new FisheyebatchPDLocalData; - memset(data, 0, sizeof(*data)); + FisheyebatchPDLocalData *data = new FisheyebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFisheyebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshFisheyebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeFisheyebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FisheyebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + FisheyebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status FisheyebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FisheyebatchPD", - VX_KERNEL_RPP_FISHEYEBATCHPD, - processFisheyebatchPD, - 6, - validateFisheyebatchPD, - initializeFisheyebatchPD, - uninitializeFisheyebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FisheyebatchPD", + VX_KERNEL_RPP_FISHEYEBATCHPD, + processFisheyebatchPD, + 6, + validateFisheyebatchPD, + initializeFisheyebatchPD, + uninitializeFisheyebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/FisheyebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/FisheyebatchPDROID.cpp deleted file mode 100644 index 29bffaa2b4..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/FisheyebatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FisheyebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFisheyebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, FisheyebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFisheyebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FisheyebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFisheyebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FisheyebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFisheyebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFisheyebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFisheyebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyebatchPDROIDLocalData * data = new FisheyebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFisheyebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFisheyebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status FisheyebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FisheyebatchPDROID", - VX_KERNEL_RPP_FISHEYEBATCHPDROID, - processFisheyebatchPDROID, - 10, - validateFisheyebatchPDROID, - initializeFisheyebatchPDROID, - uninitializeFisheyebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FisheyebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/FisheyebatchPS.cpp deleted file mode 100644 index 2ed217c959..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/FisheyebatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FisheyebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFisheyebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, FisheyebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFisheyebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FisheyebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFisheyebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FisheyebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFisheyebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFisheyebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fisheye_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fisheye_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFisheyebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyebatchPSLocalData * data = new FisheyebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFisheyebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFisheyebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FisheyebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status FisheyebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FisheyebatchPS", - VX_KERNEL_RPP_FISHEYEBATCHPS, - processFisheyebatchPS, - 6, - validateFisheyebatchPS, - initializeFisheyebatchPS, - uninitializeFisheyebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Flip.cpp b/amd_openvx_extensions/amd_rpp/source/Flip.cpp deleted file mode 100644 index e3a5e55c57..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Flip.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FlipLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u flipAxis; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFlip(vx_node node, const vx_reference *parameters, vx_uint32 num, FlipLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->flipAxis)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFlip(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Flip: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFlip(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FlipLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFlip(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->flipAxis,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->flipAxis,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFlip(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->flipAxis,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->flipAxis,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFlip(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipLocalData * data = new FlipLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFlip(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFlip(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Flip_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Flip", - VX_KERNEL_RPP_FLIP, - processFlip, - 4, - validateFlip, - initializeFlip, - uninitializeFlip); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FlipbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/FlipbatchPD.cpp index c540ebe433..96da21ac74 100644 --- a/amd_openvx_extensions/amd_rpp/source/FlipbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/FlipbatchPD.cpp @@ -22,219 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct FlipbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *flipAxis; +struct FlipbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *flipAxis; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, FlipbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->flipAxis = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->flipAxis, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->flipAxis, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateFlipbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FlipbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FlipbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processFlipbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FlipbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + FlipbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFlipbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshFlipbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_flip_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->flipAxis, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_flip_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->flipAxis, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshFlipbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshFlipbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_flip_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->flipAxis, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_flip_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->flipAxis, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFlipbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshFlipbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_flip_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->flipAxis, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_flip_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->flipAxis, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FlipbatchPDLocalData * data = new FlipbatchPDLocalData; - memset(data, 0, sizeof(*data)); + FlipbatchPDLocalData *data = new FlipbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFlipbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->flipAxis = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + refreshFlipbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeFlipbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FlipbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + FlipbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->flipAxis); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status FlipbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FlipbatchPD", - VX_KERNEL_RPP_FLIPBATCHPD, - processFlipbatchPD, - 7, - validateFlipbatchPD, - initializeFlipbatchPD, - uninitializeFlipbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FlipbatchPD", + VX_KERNEL_RPP_FLIPBATCHPD, + processFlipbatchPD, + 7, + validateFlipbatchPD, + initializeFlipbatchPD, + uninitializeFlipbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/FlipbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/FlipbatchPDROID.cpp deleted file mode 100644 index d121701f25..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/FlipbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FlipbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *flipAxis; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFlipbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, FlipbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->flipAxis = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->flipAxis, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFlipbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FlipbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFlipbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FlipbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFlipbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->flipAxis,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->flipAxis,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFlipbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->flipAxis,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->flipAxis,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFlipbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipbatchPDROIDLocalData * data = new FlipbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFlipbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFlipbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status FlipbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FlipbatchPDROID", - VX_KERNEL_RPP_FLIPBATCHPDROID, - processFlipbatchPDROID, - 11, - validateFlipbatchPDROID, - initializeFlipbatchPDROID, - uninitializeFlipbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FlipbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/FlipbatchPS.cpp deleted file mode 100644 index 1856116654..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/FlipbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FlipbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u flipAxis; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFlipbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, FlipbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->flipAxis)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFlipbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FlipbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFlipbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FlipbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFlipbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFlipbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_flip_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_flip_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->flipAxis,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFlipbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipbatchPSLocalData * data = new FlipbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFlipbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFlipbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FlipbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status FlipbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FlipbatchPS", - VX_KERNEL_RPP_FLIPBATCHPS, - processFlipbatchPS, - 7, - validateFlipbatchPS, - initializeFlipbatchPS, - uninitializeFlipbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Fog.cpp b/amd_openvx_extensions/amd_rpp/source/Fog.cpp deleted file mode 100644 index 2c830a87e3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Fog.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FogLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f fogValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFog(vx_node node, const vx_reference *parameters, vx_uint32 num, FogLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->fogValue)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFog(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Fog: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFog(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FogLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFog(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->fogValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->fogValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFog(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->fogValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->fogValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFog(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogLocalData * data = new FogLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFog(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFog(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Fog_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Fog", - VX_KERNEL_RPP_FOG, - processFog, - 4, - validateFog, - initializeFog, - uninitializeFog); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FogbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/FogbatchPD.cpp index 6fe6018391..14a6b7f233 100644 --- a/amd_openvx_extensions/amd_rpp/source/FogbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/FogbatchPD.cpp @@ -22,221 +22,264 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct FogbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *fogValue; +struct FogbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *fogValue; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, FogbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->fogValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->fogValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->fogValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateFogbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FogbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FogbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processFogbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FogbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); +static vx_status VX_CALLBACK processFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + FogbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFogbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshFogbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fog_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->fogValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fog_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->fogValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshFogbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshFogbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fog_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->fogValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fog_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->fogValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFogbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshFogbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_fog_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->fogValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_fog_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->fogValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FogbatchPDLocalData * data = new FogbatchPDLocalData; - memset(data, 0, sizeof(*data)); + FogbatchPDLocalData *data = new FogbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFogbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->fogValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshFogbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeFogbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - FogbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + FogbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->fogValue); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status FogbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FogbatchPD", - VX_KERNEL_RPP_FOGBATCHPD, - processFogbatchPD, - 7, - validateFogbatchPD, - initializeFogbatchPD, - uninitializeFogbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FogbatchPD", + VX_KERNEL_RPP_FOGBATCHPD, + processFogbatchPD, + 7, + validateFogbatchPD, + initializeFogbatchPD, + uninitializeFogbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/FogbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/FogbatchPDROID.cpp deleted file mode 100644 index fd01b703fa..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/FogbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FogbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *fogValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFogbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, FogbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->fogValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->fogValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFogbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FogbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFogbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FogbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFogbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_fog_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->fogValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_fog_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->fogValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFogbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_fog_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->fogValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_fog_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->fogValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFogbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogbatchPDROIDLocalData * data = new FogbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFogbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFogbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status FogbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FogbatchPDROID", - VX_KERNEL_RPP_FOGBATCHPDROID, - processFogbatchPDROID, - 11, - validateFogbatchPDROID, - initializeFogbatchPDROID, - uninitializeFogbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/FogbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/FogbatchPS.cpp deleted file mode 100644 index 5062aa5261..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/FogbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct FogbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f fogValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshFogbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, FogbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->fogValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateFogbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: FogbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processFogbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - FogbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshFogbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshFogbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_fog_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_fog_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->fogValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeFogbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogbatchPSLocalData * data = new FogbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshFogbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeFogbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - FogbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status FogbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.FogbatchPS", - VX_KERNEL_RPP_FOGBATCHPS, - processFogbatchPS, - 7, - validateFogbatchPS, - initializeFogbatchPS, - uninitializeFogbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GammaCorrection.cpp b/amd_openvx_extensions/amd_rpp/source/GammaCorrection.cpp deleted file mode 100644 index 4476a33324..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GammaCorrection.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GammaCorrectionLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f gamma; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num, GammaCorrectionLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->gamma)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGammaCorrection(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GammaCorrection: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGammaCorrection(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GammaCorrectionLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGammaCorrection(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->gamma,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // std::cerr<<"\n----------- Gonna call Gamma Correction------------\n"; - // std::cerr<<"\n data->gamma----"<gamma; - rpp_status = rppi_gamma_correction_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->gamma,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGammaCorrection(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->gamma,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->gamma,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionLocalData * data = new GammaCorrectionLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGammaCorrection(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGammaCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GammaCorrection_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrection", - VX_KERNEL_RPP_GAMMACORRECTION, - processGammaCorrection, - 4, - validateGammaCorrection, - initializeGammaCorrection, - uninitializeGammaCorrection); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPD.cpp index 7368ad03de..e188c71f10 100644 --- a/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPD.cpp @@ -22,219 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct GammaCorrectionbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *gamma; +struct GammaCorrectionbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *gamma; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, GammaCorrectionbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->gamma = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->gamma, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->gamma, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateGammaCorrectionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GammaCorrectionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GammaCorrectionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processGammaCorrectionbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GammaCorrectionbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + GammaCorrectionbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGammaCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshGammaCorrectionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gamma_correction_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->gamma, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->gamma, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshGammaCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshGammaCorrectionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gamma_correction_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->gamma, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->gamma, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGammaCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshGammaCorrectionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gamma_correction_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->gamma, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->gamma, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - GammaCorrectionbatchPDLocalData * data = new GammaCorrectionbatchPDLocalData; - memset(data, 0, sizeof(*data)); + GammaCorrectionbatchPDLocalData *data = new GammaCorrectionbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGammaCorrectionbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->gamma = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshGammaCorrectionbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeGammaCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - GammaCorrectionbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + GammaCorrectionbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->gamma); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status GammaCorrectionbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrectionbatchPD", - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD, - processGammaCorrectionbatchPD, - 7, - validateGammaCorrectionbatchPD, - initializeGammaCorrectionbatchPD, - uninitializeGammaCorrectionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrectionbatchPD", + VX_KERNEL_RPP_GAMMACORRECTIONBATCHPD, + processGammaCorrectionbatchPD, + 7, + validateGammaCorrectionbatchPD, + initializeGammaCorrectionbatchPD, + uninitializeGammaCorrectionbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPDROID.cpp deleted file mode 100644 index c2494e3b92..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GammaCorrectionbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *gamma; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGammaCorrectionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, GammaCorrectionbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->gamma = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->gamma, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGammaCorrectionbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GammaCorrectionbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGammaCorrectionbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GammaCorrectionbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGammaCorrectionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->gamma,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->gamma,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGammaCorrectionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->gamma,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->gamma,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGammaCorrectionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionbatchPDROIDLocalData * data = new GammaCorrectionbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGammaCorrectionbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGammaCorrectionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GammaCorrectionbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrectionbatchPDROID", - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPDROID, - processGammaCorrectionbatchPDROID, - 11, - validateGammaCorrectionbatchPDROID, - initializeGammaCorrectionbatchPDROID, - uninitializeGammaCorrectionbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPS.cpp deleted file mode 100644 index e6c84a2855..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GammaCorrectionbatchPS.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GammaCorrectionbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f gamma; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGammaCorrectionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, GammaCorrectionbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->gamma)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGammaCorrectionbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GammaCorrectionbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGammaCorrectionbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GammaCorrectionbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGammaCorrectionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // std::cerr<< "........Inside Gamma PS .........\n" ; - rpp_status = rppi_gamma_correction_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGammaCorrectionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gamma_correction_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gamma_correction_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->gamma,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGammaCorrectionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionbatchPSLocalData * data = new GammaCorrectionbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGammaCorrectionbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGammaCorrectionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GammaCorrectionbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GammaCorrectionbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GammaCorrectionbatchPS", - VX_KERNEL_RPP_GAMMACORRECTIONBATCHPS, - processGammaCorrectionbatchPS, - 7, - validateGammaCorrectionbatchPS, - initializeGammaCorrectionbatchPS, - uninitializeGammaCorrectionbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianFilter.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianFilter.cpp deleted file mode 100644 index 802ae9cde3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GaussianFilter.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianFilterLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianFilter(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianFilterLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianFilter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianFilter: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianFilter(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianFilterLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterLocalData * data = new GaussianFilterLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianFilter(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GaussianFilter_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianFilter", - VX_KERNEL_RPP_GAUSSIANFILTER, - processGaussianFilter, - 5, - validateGaussianFilter, - initializeGaussianFilter, - uninitializeGaussianFilter); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPD.cpp index d03e08be2c..6b3b42ed60 100644 --- a/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPD.cpp @@ -22,206 +22,269 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct GaussianFilterbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; - vx_uint32 *kernelSize; +struct GaussianFilterbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *stdDev; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianFilterbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateGaussianFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processGaussianFilterbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianFilterbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + GaussianFilterbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshGaussianFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshGaussianFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshGaussianFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - GaussianFilterbatchPDLocalData * data = new GaussianFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); + GaussianFilterbatchPDLocalData *data = new GaussianFilterbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianFilterbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshGaussianFilterbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeGaussianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - GaussianFilterbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + GaussianFilterbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->stdDev); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status GaussianFilterbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianFilterbatchPD", - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD, - processGaussianFilterbatchPD, - 8, - validateGaussianFilterbatchPD, - initializeGaussianFilterbatchPD, - uninitializeGaussianFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianFilterbatchPD", + VX_KERNEL_RPP_GAUSSIANFILTERBATCHPD, + processGaussianFilterbatchPD, + 8, + validateGaussianFilterbatchPD, + initializeGaussianFilterbatchPD, + uninitializeGaussianFilterbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPDROID.cpp deleted file mode 100644 index 9ab3856ede..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPDROID.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianFilterbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianFilterbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianFilterbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianFilterbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianFilterbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianFilterbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterbatchPDROIDLocalData * data = new GaussianFilterbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianFilterbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GaussianFilterbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianFilterbatchPDROID", - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPDROID, - processGaussianFilterbatchPDROID, - 12, - validateGaussianFilterbatchPDROID, - initializeGaussianFilterbatchPDROID, - uninitializeGaussianFilterbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPS.cpp deleted file mode 100644 index 77728ee0db..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GaussianFilterbatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianFilterbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianFilterbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianFilterbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianFilterbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianFilterbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianFilterbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_filter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_filter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterbatchPSLocalData * data = new GaussianFilterbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianFilterbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianFilterbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GaussianFilterbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianFilterbatchPS", - VX_KERNEL_RPP_GAUSSIANFILTERBATCHPS, - processGaussianFilterbatchPS, - 8, - validateGaussianFilterbatchPS, - initializeGaussianFilterbatchPS, - uninitializeGaussianFilterbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramid.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramid.cpp deleted file mode 100644 index f794249d49..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramid.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianImagePyramidLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianImagePyramidLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianImagePyramid(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianImagePyramid: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianImagePyramid(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianImagePyramidLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianImagePyramidLocalData * data = new GaussianImagePyramidLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianImagePyramid(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianImagePyramidLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GaussianImagePyramid_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianImagePyramid", - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMID, - processGaussianImagePyramid, - 5, - validateGaussianImagePyramid, - initializeGaussianImagePyramid, - uninitializeGaussianImagePyramid); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPD.cpp index 6d9572c4b2..30aa0ff0aa 100644 --- a/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPD.cpp @@ -22,206 +22,269 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct GaussianImagePyramidbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; - vx_uint32 *kernelSize; +struct GaussianImagePyramidbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *stdDev; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianImagePyramidbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateGaussianImagePyramidbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianImagePyramidbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianImagePyramidbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processGaussianImagePyramidbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianImagePyramidbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + GaussianImagePyramidbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshGaussianImagePyramidbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshGaussianImagePyramidbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshGaussianImagePyramidbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - GaussianImagePyramidbatchPDLocalData * data = new GaussianImagePyramidbatchPDLocalData; - memset(data, 0, sizeof(*data)); + GaussianImagePyramidbatchPDLocalData *data = new GaussianImagePyramidbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianImagePyramidbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshGaussianImagePyramidbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeGaussianImagePyramidbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - GaussianImagePyramidbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + GaussianImagePyramidbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->stdDev); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status GaussianImagePyramidbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianImagePyramidbatchPD", - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD, - processGaussianImagePyramidbatchPD, - 8, - validateGaussianImagePyramidbatchPD, - initializeGaussianImagePyramidbatchPD, - uninitializeGaussianImagePyramidbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianImagePyramidbatchPD", + VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPD, + processGaussianImagePyramidbatchPD, + 8, + validateGaussianImagePyramidbatchPD, + initializeGaussianImagePyramidbatchPD, + uninitializeGaussianImagePyramidbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPS.cpp deleted file mode 100644 index 1884b05847..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/GaussianImagePyramidbatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct GaussianImagePyramidbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshGaussianImagePyramidbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, GaussianImagePyramidbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateGaussianImagePyramidbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: GaussianImagePyramidbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processGaussianImagePyramidbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - GaussianImagePyramidbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshGaussianImagePyramidbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshGaussianImagePyramidbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_gaussian_image_pyramid_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_gaussian_image_pyramid_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeGaussianImagePyramidbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianImagePyramidbatchPSLocalData * data = new GaussianImagePyramidbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshGaussianImagePyramidbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeGaussianImagePyramidbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - GaussianImagePyramidbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status GaussianImagePyramidbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.GaussianImagePyramidbatchPS", - VX_KERNEL_RPP_GAUSSIANIMAGEPYRAMIDBATCHPS, - processGaussianImagePyramidbatchPS, - 8, - validateGaussianImagePyramidbatchPS, - initializeGaussianImagePyramidbatchPS, - uninitializeGaussianImagePyramidbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HarrisCornerDetector.cpp b/amd_openvx_extensions/amd_rpp/source/HarrisCornerDetector.cpp index b3c921e1f0..e56b95b2c9 100644 --- a/amd_openvx_extensions/amd_rpp/source/HarrisCornerDetector.cpp +++ b/amd_openvx_extensions/amd_rpp/source/HarrisCornerDetector.cpp @@ -22,205 +22,246 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct HarrisCornerDetectorLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u gaussianKernelSize; - Rpp32f stdDev; - Rpp32u kernelSize; - Rpp32f kValue; - Rpp32f threshold; - Rpp32u nonMaxKernelSize; +struct HarrisCornerDetectorLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32u gaussianKernelSize; + Rpp32f stdDev; + Rpp32u kernelSize; + Rpp32f kValue; + Rpp32f threshold; + Rpp32u nonMaxKernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#endif }; static vx_status VX_CALLBACK refreshHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num, HarrisCornerDetectorLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->gaussianKernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->kValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->threshold)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nonMaxKernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->gaussianKernelSize)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->stdDev)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->kValue)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->threshold)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nonMaxKernelSize)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateHarrisCornerDetector(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HarrisCornerDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HarrisCornerDetector: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processHarrisCornerDetector(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HarrisCornerDetectorLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + HarrisCornerDetectorLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHarrisCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_harris_corner_detector_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->gaussianKernelSize,data->stdDev,data->kernelSize,data->kValue,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_harris_corner_detector_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->gaussianKernelSize,data->stdDev,data->kernelSize,data->kValue,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshHarrisCornerDetector(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_harris_corner_detector_u8_pln1_gpu((void *)data->cl_pSrc, data->srcDimensions, (void *)data->cl_pDst, data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_harris_corner_detector_u8_pkd3_gpu((void *)data->cl_pSrc, data->srcDimensions, (void *)data->cl_pDst, data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHarrisCornerDetector(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_harris_corner_detector_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->gaussianKernelSize,data->stdDev,data->kernelSize,data->kValue,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_harris_corner_detector_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->gaussianKernelSize,data->stdDev,data->kernelSize,data->kValue,data->threshold,data->nonMaxKernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshHarrisCornerDetector(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_harris_corner_detector_u8_pln1_host(data->pSrc, data->srcDimensions, data->pDst, data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_harris_corner_detector_u8_pkd3_host(data->pSrc, data->srcDimensions, data->pDst, data->gaussianKernelSize, data->stdDev, data->kernelSize, data->kValue, data->threshold, data->nonMaxKernelSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HarrisCornerDetectorLocalData * data = new HarrisCornerDetectorLocalData; - memset(data, 0, sizeof(*data)); + HarrisCornerDetectorLocalData *data = new HarrisCornerDetectorLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHarrisCornerDetector(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshHarrisCornerDetector(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeHarrisCornerDetector(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HarrisCornerDetectorLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + HarrisCornerDetectorLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status HarrisCornerDetector_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HarrisCornerDetector", - VX_KERNEL_RPP_HARRISCORNERDETECTOR, - processHarrisCornerDetector, - 9, - validateHarrisCornerDetector, - initializeHarrisCornerDetector, - uninitializeHarrisCornerDetector); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HarrisCornerDetector", + VX_KERNEL_RPP_HARRISCORNERDETECTOR, + processHarrisCornerDetector, + 9, + validateHarrisCornerDetector, + initializeHarrisCornerDetector, + uninitializeHarrisCornerDetector); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/Histogram.cpp b/amd_openvx_extensions/amd_rpp/source/Histogram.cpp index 4753b5c356..38e990309a 100644 --- a/amd_openvx_extensions/amd_rpp/source/Histogram.cpp +++ b/amd_openvx_extensions/amd_rpp/source/Histogram.cpp @@ -22,172 +22,232 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct HistogramLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - Rpp32u *outputHistogram; - Rpp32u bins; +struct HistogramLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + Rpp32u *outputHistogram; + Rpp32u bins; #if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif + cl_mem cl_pSrc; +#elif ENABLE_HIP + void *hip_pSrc; +#endif }; static vx_status VX_CALLBACK refreshHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->outputHistogram = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u),data->outputHistogram, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->bins)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + size_t arr_size; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + data->outputHistogram = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->bins)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateHistogram(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Histogram: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Histogram: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - vxReleaseImage(&input); - vxReleaseParameter(&input_param); - return status; + vxReleaseImage(&input); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processHistogram(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + HistogramLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogram(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_histogram_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,data->outputHistogram,data->bins,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,data->outputHistogram,data->bins,data->rppHandle); - } - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u),data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshHistogram(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_histogram_u8_pln1_gpu((void *)data->cl_pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_histogram_u8_pkd3_gpu((void *)data->cl_pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); + } + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshHistogram(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_histogram_u8_pln1_gpu((void *)data->hip_pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_histogram_u8_pkd3_gpu((void *)data->hip_pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); + } + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogram(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_histogram_u8_pln1_host(data->pSrc,data->srcDimensions,data->outputHistogram,data->bins,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_u8_pkd3_host(data->pSrc,data->srcDimensions,data->outputHistogram,data->bins,data->rppHandle); - } - size_t arr_size; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u),data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshHistogram(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_histogram_u8_pln1_host(data->pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_histogram_u8_pkd3_host(data->pSrc, data->srcDimensions, data->outputHistogram, data->bins, data->rppHandle); + } + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp32u), data->outputHistogram, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HistogramLocalData * data = new HistogramLocalData; - memset(data, 0, sizeof(*data)); + HistogramLocalData *data = new HistogramLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogram(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshHistogram(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeHistogram(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HistogramLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + HistogramLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status Histogram_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Histogram", - VX_KERNEL_RPP_HISTOGRAM, - processHistogram, - 4, - validateHistogram, - initializeHistogram, - uninitializeHistogram); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Histogram", + VX_KERNEL_RPP_HISTOGRAM, + processHistogram, + 4, + validateHistogram, + initializeHistogram, + uninitializeHistogram); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramBalance.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramBalance.cpp deleted file mode 100644 index 585e1f09de..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HistogramBalance.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramBalanceLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramBalance(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramBalanceLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramBalance(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramBalance: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramBalance(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramBalanceLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramBalance(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_histogram_balance_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_histogram_balance_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramBalance(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_histogram_balance_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_histogram_balance_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramBalance(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalanceLocalData * data = new HistogramBalanceLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramBalance(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramBalance(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalanceLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HistogramBalance_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramBalance", - VX_KERNEL_RPP_HISTOGRAMBALANCE, - processHistogramBalance, - 3, - validateHistogramBalance, - initializeHistogramBalance, - uninitializeHistogramBalance); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPD.cpp index 48247695e3..7fa5a2c73a 100644 --- a/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPD.cpp @@ -22,196 +22,235 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct HistogramBalancebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct HistogramBalancebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#endif }; static vx_status VX_CALLBACK refreshHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramBalancebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateHistogramBalancebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramBalancebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramBalancebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } -static vx_status VX_CALLBACK processHistogramBalancebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramBalancebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramBalancebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; +} -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramBalancebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; +static vx_status VX_CALLBACK processHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + HistogramBalancebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshHistogramBalancebatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_histogram_balance_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshHistogramBalancebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_histogram_balance_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HistogramBalancebatchPDLocalData * data = new HistogramBalancebatchPDLocalData; - memset(data, 0, sizeof(*data)); + HistogramBalancebatchPDLocalData *data = new HistogramBalancebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramBalancebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshHistogramBalancebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeHistogramBalancebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HistogramBalancebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + HistogramBalancebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status HistogramBalancebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramBalancebatchPD", - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD, - processHistogramBalancebatchPD, - 6, - validateHistogramBalancebatchPD, - initializeHistogramBalancebatchPD, - uninitializeHistogramBalancebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramBalancebatchPD", + VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPD, + processHistogramBalancebatchPD, + 6, + validateHistogramBalancebatchPD, + initializeHistogramBalancebatchPD, + uninitializeHistogramBalancebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPDROID.cpp deleted file mode 100644 index 49b7aa29d8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramBalancebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramBalancebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramBalancebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramBalancebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramBalancebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramBalancebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramBalancebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramBalancebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramBalancebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramBalancebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalancebatchPDROIDLocalData * data = new HistogramBalancebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramBalancebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramBalancebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalancebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HistogramBalancebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramBalancebatchPDROID", - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPDROID, - processHistogramBalancebatchPDROID, - 10, - validateHistogramBalancebatchPDROID, - initializeHistogramBalancebatchPDROID, - uninitializeHistogramBalancebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPS.cpp deleted file mode 100644 index f4062a985b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HistogramBalancebatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramBalancebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramBalancebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramBalancebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramBalancebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramBalancebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramBalancebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramBalancebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramBalancebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramBalancebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_balance_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_balance_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramBalancebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalancebatchPSLocalData * data = new HistogramBalancebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramBalancebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramBalancebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramBalancebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HistogramBalancebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramBalancebatchPS", - VX_KERNEL_RPP_HISTOGRAMBALANCEBATCHPS, - processHistogramBalancebatchPS, - 6, - validateHistogramBalancebatchPS, - initializeHistogramBalancebatchPS, - uninitializeHistogramBalancebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramEqualize.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramEqualize.cpp deleted file mode 100644 index 805f17f59b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HistogramEqualize.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramEqualizeLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramEqualize(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramEqualizeLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramEqualize(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramEqualize: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramEqualize(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramEqualizeLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramEqualize(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_histogram_equalization_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_histogram_equalization_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramEqualize(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_histogram_equalization_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_histogram_equalization_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramEqualize(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizeLocalData * data = new HistogramEqualizeLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramEqualize(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramEqualize(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizeLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HistogramEqualize_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramEqualize", - VX_KERNEL_RPP_HISTOGRAMEQUALIZE, - processHistogramEqualize, - 3, - validateHistogramEqualize, - initializeHistogramEqualize, - uninitializeHistogramEqualize); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPD.cpp index 0150dd5e3e..941f4eda03 100644 --- a/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPD.cpp @@ -22,196 +22,255 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct HistogramEqualizebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct HistogramEqualizebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramEqualizebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateHistogramEqualizebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramEqualizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramEqualizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } -static vx_status VX_CALLBACK processHistogramEqualizebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramEqualizebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramEqualizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; +} -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramEqualizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; +static vx_status VX_CALLBACK processHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + HistogramEqualizebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshHistogramEqualizebatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshHistogramEqualizebatchPD(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshHistogramEqualizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HistogramEqualizebatchPDLocalData * data = new HistogramEqualizebatchPDLocalData; - memset(data, 0, sizeof(*data)); + HistogramEqualizebatchPDLocalData *data = new HistogramEqualizebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramEqualizebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshHistogramEqualizebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeHistogramEqualizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HistogramEqualizebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + HistogramEqualizebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status HistogramEqualizebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramEqualizebatchPD", - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD, - processHistogramEqualizebatchPD, - 6, - validateHistogramEqualizebatchPD, - initializeHistogramEqualizebatchPD, - uninitializeHistogramEqualizebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramEqualizebatchPD", + VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPD, + processHistogramEqualizebatchPD, + 6, + validateHistogramEqualizebatchPD, + initializeHistogramEqualizebatchPD, + uninitializeHistogramEqualizebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPDROID.cpp deleted file mode 100644 index ced0a5a8cc..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramEqualizebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramEqualizebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramEqualizebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramEqualizebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramEqualizebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramEqualizebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramEqualizebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramEqualizebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramEqualizebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramEqualizebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizebatchPDROIDLocalData * data = new HistogramEqualizebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramEqualizebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramEqualizebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HistogramEqualizebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramEqualizebatchPDROID", - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPDROID, - processHistogramEqualizebatchPDROID, - 10, - validateHistogramEqualizebatchPDROID, - initializeHistogramEqualizebatchPDROID, - uninitializeHistogramEqualizebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPS.cpp deleted file mode 100644 index 3183262cfc..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HistogramEqualizebatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HistogramEqualizebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHistogramEqualizebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, HistogramEqualizebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHistogramEqualizebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HistogramEqualizebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHistogramEqualizebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HistogramEqualizebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHistogramEqualizebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHistogramEqualizebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_histogram_equalization_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_histogram_equalization_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHistogramEqualizebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizebatchPSLocalData * data = new HistogramEqualizebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHistogramEqualizebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHistogramEqualizebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HistogramEqualizebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HistogramEqualizebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HistogramEqualizebatchPS", - VX_KERNEL_RPP_HISTOGRAMEQUALIZEBATCHPS, - processHistogramEqualizebatchPS, - 6, - validateHistogramEqualizebatchPS, - initializeHistogramEqualizebatchPS, - uninitializeHistogramEqualizebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Hue.cpp b/amd_openvx_extensions/amd_rpp/source/Hue.cpp deleted file mode 100644 index 2de69b7e92..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Hue.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HueLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f hueShift; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHue(vx_node node, const vx_reference *parameters, vx_uint32 num, HueLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->hueShift)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHue(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Hue: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHue(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HueLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHue(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_hueRGB_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->hueShift,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - - rpp_status = rppi_hueRGB_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->hueShift,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHue(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_hueRGB_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->hueShift,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - std::cout << "coming till mivisionx call- HueRGB" << std::endl; - - rpp_status = rppi_hueRGB_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->hueShift,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHue(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HueLocalData * data = new HueLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHue(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHue(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HueLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Hue_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Hue", - VX_KERNEL_RPP_HUE, - processHue, - 4, - validateHue, - initializeHue, - uninitializeHue); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HuebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/HuebatchPD.cpp index 168a83207f..75713fdbfd 100644 --- a/amd_openvx_extensions/amd_rpp/source/HuebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/HuebatchPD.cpp @@ -22,220 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct HuebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *hueShift; +struct HuebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *hueShift; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, HuebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->hueShift = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->hueShift, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->hueShift, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateHuebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HuebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HuebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processHuebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HuebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + HuebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHuebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_hueRGB_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshHuebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_hueRGB_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->hueShift, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->hueShift, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshHuebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_hueRGB_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshHuebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_hueRGB_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->hueShift, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_hueRGB_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->hueShift, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHuebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_hueRGB_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_hueRGB_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshHuebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_hueRGB_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->hueShift, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_hueRGB_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->hueShift, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HuebatchPDLocalData * data = new HuebatchPDLocalData; - memset(data, 0, sizeof(*data)); + HuebatchPDLocalData *data = new HuebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHuebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->hueShift = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshHuebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeHuebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - HuebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + HuebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->hueShift); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status HuebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HuebatchPD", - VX_KERNEL_RPP_HUEBATCHPD, - processHuebatchPD, - 7, - validateHuebatchPD, - initializeHuebatchPD, - uninitializeHuebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HuebatchPD", + VX_KERNEL_RPP_HUEBATCHPD, + processHuebatchPD, + 7, + validateHuebatchPD, + initializeHuebatchPD, + uninitializeHuebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/HuebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/HuebatchPDROID.cpp deleted file mode 100644 index 9f0b255ddb..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HuebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HuebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *hueShift; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHuebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, HuebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->hueShift = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->hueShift, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHuebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HuebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHuebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HuebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHuebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_hueRGB_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->hueShift,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_hueRGB_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->hueShift,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHuebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_hueRGB_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->hueShift,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_hueRGB_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->hueShift,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHuebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HuebatchPDROIDLocalData * data = new HuebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHuebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHuebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HuebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HuebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HuebatchPDROID", - VX_KERNEL_RPP_HUEBATCHPDROID, - processHuebatchPDROID, - 11, - validateHuebatchPDROID, - initializeHuebatchPDROID, - uninitializeHuebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/HuebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/HuebatchPS.cpp deleted file mode 100644 index 29a1635370..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/HuebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct HuebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f hueShift; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshHuebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, HuebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->hueShift)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateHuebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: HuebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processHuebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - HuebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshHuebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_hueRGB_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_hueRGB_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshHuebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_hueRGB_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_hueRGB_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->hueShift,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeHuebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HuebatchPSLocalData * data = new HuebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshHuebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeHuebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - HuebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status HuebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.HuebatchPS", - VX_KERNEL_RPP_HUEBATCHPS, - processHuebatchPS, - 7, - validateHuebatchPS, - initializeHuebatchPS, - uninitializeHuebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/InclusiveOR.cpp b/amd_openvx_extensions/amd_rpp/source/InclusiveOR.cpp deleted file mode 100644 index eca26bfecb..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/InclusiveOR.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct InclusiveORLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshInclusiveOR(vx_node node, const vx_reference *parameters, vx_uint32 num, InclusiveORLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateInclusiveOR(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveOR: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveOR: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processInclusiveOR(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - InclusiveORLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshInclusiveOR(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshInclusiveOR(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeInclusiveOR(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORLocalData * data = new InclusiveORLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshInclusiveOR(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeInclusiveOR(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status InclusiveOR_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.InclusiveOR", - VX_KERNEL_RPP_INCLUSIVEOR, - processInclusiveOR, - 4, - validateInclusiveOR, - initializeInclusiveOR, - uninitializeInclusiveOR); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPD.cpp index 302e635841..064488cdc7 100644 --- a/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPD.cpp @@ -22,209 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct InclusiveORbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct InclusiveORbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, InclusiveORbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateInclusiveORbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processInclusiveORbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - InclusiveORbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + InclusiveORbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshInclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshInclusiveORbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshInclusiveORbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshInclusiveORbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshInclusiveORbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - InclusiveORbatchPDLocalData * data = new InclusiveORbatchPDLocalData; - memset(data, 0, sizeof(*data)); + InclusiveORbatchPDLocalData *data = new InclusiveORbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshInclusiveORbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshInclusiveORbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeInclusiveORbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - InclusiveORbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + InclusiveORbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status InclusiveORbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.InclusiveORbatchPD", - VX_KERNEL_RPP_INCLUSIVEORBATCHPD, - processInclusiveORbatchPD, - 7, - validateInclusiveORbatchPD, - initializeInclusiveORbatchPD, - uninitializeInclusiveORbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.InclusiveORbatchPD", + VX_KERNEL_RPP_INCLUSIVEORBATCHPD, + processInclusiveORbatchPD, + 7, + validateInclusiveORbatchPD, + initializeInclusiveORbatchPD, + uninitializeInclusiveORbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPDROID.cpp deleted file mode 100644 index b631d1a415..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct InclusiveORbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshInclusiveORbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, InclusiveORbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateInclusiveORbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processInclusiveORbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - InclusiveORbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshInclusiveORbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshInclusiveORbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeInclusiveORbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORbatchPDROIDLocalData * data = new InclusiveORbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshInclusiveORbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeInclusiveORbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status InclusiveORbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.InclusiveORbatchPDROID", - VX_KERNEL_RPP_INCLUSIVEORBATCHPDROID, - processInclusiveORbatchPDROID, - 11, - validateInclusiveORbatchPDROID, - initializeInclusiveORbatchPDROID, - uninitializeInclusiveORbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPS.cpp deleted file mode 100644 index edf50889da..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/InclusiveORbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct InclusiveORbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshInclusiveORbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, InclusiveORbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateInclusiveORbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: InclusiveORbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processInclusiveORbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - InclusiveORbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshInclusiveORbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshInclusiveORbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_inclusive_OR_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_inclusive_OR_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeInclusiveORbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORbatchPSLocalData * data = new InclusiveORbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshInclusiveORbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeInclusiveORbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - InclusiveORbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status InclusiveORbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.InclusiveORbatchPS", - VX_KERNEL_RPP_INCLUSIVEORBATCHPS, - processInclusiveORbatchPS, - 7, - validateInclusiveORbatchPS, - initializeInclusiveORbatchPS, - uninitializeInclusiveORbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Jitter.cpp b/amd_openvx_extensions/amd_rpp/source/Jitter.cpp deleted file mode 100644 index cd25873151..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Jitter.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct JitterLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshJitter(vx_node node, const vx_reference *parameters, vx_uint32 num, JitterLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateJitter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Jitter: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processJitter(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - JitterLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshJitter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshJitter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeJitter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterLocalData * data = new JitterLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshJitter(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeJitter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Jitter_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Jitter", - VX_KERNEL_RPP_JITTER, - processJitter, - 4, - validateJitter, - initializeJitter, - uninitializeJitter); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/JitterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/JitterbatchPD.cpp index 456bab7e17..85708959ca 100644 --- a/amd_openvx_extensions/amd_rpp/source/JitterbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/JitterbatchPD.cpp @@ -22,220 +22,264 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct JitterbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct JitterbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, JitterbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateJitterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: JitterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: JitterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processJitterbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - JitterbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + JitterbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshJitterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshJitterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_jitter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_jitter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshJitterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshJitterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_jitter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_jitter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshJitterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshJitterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_jitter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_jitter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - JitterbatchPDLocalData * data = new JitterbatchPDLocalData; - memset(data, 0, sizeof(*data)); + JitterbatchPDLocalData *data = new JitterbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshJitterbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshJitterbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeJitterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - JitterbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + JitterbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status JitterbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.JitterbatchPD", - VX_KERNEL_RPP_JITTERBATCHPD, - processJitterbatchPD, - 7, - validateJitterbatchPD, - initializeJitterbatchPD, - uninitializeJitterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.JitterbatchPD", + VX_KERNEL_RPP_JITTERBATCHPD, + processJitterbatchPD, + 7, + validateJitterbatchPD, + initializeJitterbatchPD, + uninitializeJitterbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/JitterbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/JitterbatchPDROID.cpp deleted file mode 100644 index 197bfbe95c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/JitterbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct JitterbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshJitterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, JitterbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateJitterbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: JitterbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processJitterbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - JitterbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshJitterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshJitterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeJitterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterbatchPDROIDLocalData * data = new JitterbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshJitterbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeJitterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status JitterbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.JitterbatchPDROID", - VX_KERNEL_RPP_JITTERBATCHPDROID, - processJitterbatchPDROID, - 11, - validateJitterbatchPDROID, - initializeJitterbatchPDROID, - uninitializeJitterbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/JitterbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/JitterbatchPS.cpp deleted file mode 100644 index d80d2e5b15..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/JitterbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct JitterbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshJitterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, JitterbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateJitterbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: JitterbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processJitterbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - JitterbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshJitterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshJitterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_jitter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_jitter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeJitterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterbatchPSLocalData * data = new JitterbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshJitterbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeJitterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - JitterbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status JitterbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.JitterbatchPS", - VX_KERNEL_RPP_JITTERBATCHPS, - processJitterbatchPS, - 7, - validateJitterbatchPS, - initializeJitterbatchPS, - uninitializeJitterbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LaplacianImagePyramid.cpp b/amd_openvx_extensions/amd_rpp/source/LaplacianImagePyramid.cpp index 288a7e9e53..7350c2fb1b 100644 --- a/amd_openvx_extensions/amd_rpp/source/LaplacianImagePyramid.cpp +++ b/amd_openvx_extensions/amd_rpp/source/LaplacianImagePyramid.cpp @@ -22,185 +22,243 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct LaplacianImagePyramidLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; - Rpp32u kernelSize; +struct LaplacianImagePyramidLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32f stdDev; + Rpp32u kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num, LaplacianImagePyramidLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->kernelSize)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateLaplacianImagePyramid(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LaplacianImagePyramid: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LaplacianImagePyramid: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processLaplacianImagePyramid(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LaplacianImagePyramidLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + LaplacianImagePyramidLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLaplacianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_laplacian_image_pyramid_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - + refreshLaplacianImagePyramid(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_laplacian_image_pyramid_u8_pln1_gpu((void *)data->cl_pSrc, data->srcDimensions, (void *)data->cl_pDst, data->stdDev, data->kernelSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_gpu((void *)data->cl_pSrc, data->srcDimensions, (void *)data->cl_pDst, data->stdDev, data->kernelSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshLaplacianImagePyramid(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_laplacian_image_pyramid_u8_pln1_gpu((void *)data->hip_pSrc, data->srcDimensions, (void *)data->hip_pDst, data->stdDev, data->kernelSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_gpu((void *)data->hip_pSrc, data->srcDimensions, (void *)data->hip_pDst, data->stdDev, data->kernelSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLaplacianImagePyramid(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_laplacian_image_pyramid_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshLaplacianImagePyramid(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_laplacian_image_pyramid_u8_pln1_host(data->pSrc, data->srcDimensions, data->pDst, data->stdDev, data->kernelSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_laplacian_image_pyramid_u8_pkd3_host(data->pSrc, data->srcDimensions, data->pDst, data->stdDev, data->kernelSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LaplacianImagePyramidLocalData * data = new LaplacianImagePyramidLocalData; - memset(data, 0, sizeof(*data)); + LaplacianImagePyramidLocalData *data = new LaplacianImagePyramidLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLaplacianImagePyramid(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshLaplacianImagePyramid(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeLaplacianImagePyramid(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LaplacianImagePyramidLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + LaplacianImagePyramidLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status LaplacianImagePyramid_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LaplacianImagePyramid", - VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID, - processLaplacianImagePyramid, - 5, - validateLaplacianImagePyramid, - initializeLaplacianImagePyramid, - uninitializeLaplacianImagePyramid); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LaplacianImagePyramid", + VX_KERNEL_RPP_LAPLACIANIMAGEPYRAMID, + processLaplacianImagePyramid, + 5, + validateLaplacianImagePyramid, + initializeLaplacianImagePyramid, + uninitializeLaplacianImagePyramid); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/LensCorrection.cpp b/amd_openvx_extensions/amd_rpp/source/LensCorrection.cpp deleted file mode 100644 index c97aa1815d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LensCorrection.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LensCorrectionLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f strength; - Rpp32f zoom; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num, LensCorrectionLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->strength)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->zoom)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLensCorrection(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LensCorrection: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLensCorrection(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LensCorrectionLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLensCorrection(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLensCorrection(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->strength,data->zoom,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->strength,data->zoom,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionLocalData * data = new LensCorrectionLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLensCorrection(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLensCorrection(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LensCorrection_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrection", - VX_KERNEL_RPP_LENSCORRECTION, - processLensCorrection, - 5, - validateLensCorrection, - initializeLensCorrection, - uninitializeLensCorrection); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPD.cpp index 3691c33f47..31179d2ceb 100644 --- a/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPD.cpp @@ -22,225 +22,268 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct LensCorrectionbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *strength; - vx_float32 *zoom; +struct LensCorrectionbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *strength; + vx_float32 *zoom; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, LensCorrectionbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->strength = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->strength, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->zoom = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float32),data->zoom, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->strength, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_float32), data->zoom, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateLensCorrectionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LensCorrectionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LensCorrectionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processLensCorrectionbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LensCorrectionbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + LensCorrectionbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLensCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshLensCorrectionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->strength, data->zoom, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->strength, data->zoom, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshLensCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshLensCorrectionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_lens_correction_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->strength, data->zoom, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_lens_correction_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->strength, data->zoom, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLensCorrectionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshLensCorrectionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_lens_correction_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->strength, data->zoom, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_lens_correction_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->strength, data->zoom, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LensCorrectionbatchPDLocalData * data = new LensCorrectionbatchPDLocalData; - memset(data, 0, sizeof(*data)); + LensCorrectionbatchPDLocalData *data = new LensCorrectionbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLensCorrectionbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->strength = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->zoom = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshLensCorrectionbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeLensCorrectionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LensCorrectionbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + LensCorrectionbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + free(data->strength); + free(data->zoom); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status LensCorrectionbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrectionbatchPD", - VX_KERNEL_RPP_LENSCORRECTIONBATCHPD, - processLensCorrectionbatchPD, - 8, - validateLensCorrectionbatchPD, - initializeLensCorrectionbatchPD, - uninitializeLensCorrectionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrectionbatchPD", + VX_KERNEL_RPP_LENSCORRECTIONBATCHPD, + processLensCorrectionbatchPD, + 8, + validateLensCorrectionbatchPD, + initializeLensCorrectionbatchPD, + uninitializeLensCorrectionbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPDROID.cpp deleted file mode 100644 index 899ecef7f8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPDROID.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LensCorrectionbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *strength; - vx_float32 *zoom; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLensCorrectionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, LensCorrectionbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->strength = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->strength, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->zoom = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_float32),data->zoom, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLensCorrectionbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LensCorrectionbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLensCorrectionbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LensCorrectionbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLensCorrectionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLensCorrectionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->strength,data->zoom,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->strength,data->zoom,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLensCorrectionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionbatchPDROIDLocalData * data = new LensCorrectionbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLensCorrectionbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLensCorrectionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LensCorrectionbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrectionbatchPDROID", - VX_KERNEL_RPP_LENSCORRECTIONBATCHPDROID, - processLensCorrectionbatchPDROID, - 12, - validateLensCorrectionbatchPDROID, - initializeLensCorrectionbatchPDROID, - uninitializeLensCorrectionbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPS.cpp deleted file mode 100644 index b104b3d74a..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LensCorrectionbatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LensCorrectionbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f strength; - Rpp32f zoom; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLensCorrectionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, LensCorrectionbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->strength)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->zoom)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLensCorrectionbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LensCorrectionbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLensCorrectionbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LensCorrectionbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLensCorrectionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLensCorrectionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_lens_correction_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_lens_correction_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->strength,data->zoom,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLensCorrectionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionbatchPSLocalData * data = new LensCorrectionbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLensCorrectionbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLensCorrectionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LensCorrectionbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LensCorrectionbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LensCorrectionbatchPS", - VX_KERNEL_RPP_LENSCORRECTIONBATCHPS, - processLensCorrectionbatchPS, - 8, - validateLensCorrectionbatchPS, - initializeLensCorrectionbatchPS, - uninitializeLensCorrectionbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPattern.cpp b/amd_openvx_extensions/amd_rpp/source/LocalBinaryPattern.cpp deleted file mode 100644 index e7a367d54d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPattern.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LocalBinaryPatternLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLocalBinaryPattern(vx_node node, const vx_reference *parameters, vx_uint32 num, LocalBinaryPatternLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLocalBinaryPattern(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LocalBinaryPattern: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLocalBinaryPattern(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LocalBinaryPatternLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLocalBinaryPattern(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLocalBinaryPattern(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLocalBinaryPattern(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternLocalData * data = new LocalBinaryPatternLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLocalBinaryPattern(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLocalBinaryPattern(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LocalBinaryPattern_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LocalBinaryPattern", - VX_KERNEL_RPP_LOCALBINARYPATTERN, - processLocalBinaryPattern, - 3, - validateLocalBinaryPattern, - initializeLocalBinaryPattern, - uninitializeLocalBinaryPattern); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPD.cpp index 262cb5d55c..120ea2708c 100644 --- a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPD.cpp @@ -22,196 +22,259 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct LocalBinaryPatternbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct LocalBinaryPatternbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, LocalBinaryPatternbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateLocalBinaryPatternbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LocalBinaryPatternbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LocalBinaryPatternbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processLocalBinaryPatternbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LocalBinaryPatternbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + LocalBinaryPatternbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshLocalBinaryPatternbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshLocalBinaryPatternbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshLocalBinaryPatternbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LocalBinaryPatternbatchPDLocalData * data = new LocalBinaryPatternbatchPDLocalData; - memset(data, 0, sizeof(*data)); + LocalBinaryPatternbatchPDLocalData *data = new LocalBinaryPatternbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLocalBinaryPatternbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshLocalBinaryPatternbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeLocalBinaryPatternbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LocalBinaryPatternbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + LocalBinaryPatternbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status LocalBinaryPatternbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LocalBinaryPatternbatchPD", - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD, - processLocalBinaryPatternbatchPD, - 6, - validateLocalBinaryPatternbatchPD, - initializeLocalBinaryPatternbatchPD, - uninitializeLocalBinaryPatternbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LocalBinaryPatternbatchPD", + VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPD, + processLocalBinaryPatternbatchPD, + 6, + validateLocalBinaryPatternbatchPD, + initializeLocalBinaryPatternbatchPD, + uninitializeLocalBinaryPatternbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPDROID.cpp deleted file mode 100644 index 6eb9824484..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LocalBinaryPatternbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLocalBinaryPatternbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, LocalBinaryPatternbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLocalBinaryPatternbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LocalBinaryPatternbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLocalBinaryPatternbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LocalBinaryPatternbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLocalBinaryPatternbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLocalBinaryPatternbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLocalBinaryPatternbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternbatchPDROIDLocalData * data = new LocalBinaryPatternbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLocalBinaryPatternbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLocalBinaryPatternbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LocalBinaryPatternbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LocalBinaryPatternbatchPDROID", - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPDROID, - processLocalBinaryPatternbatchPDROID, - 10, - validateLocalBinaryPatternbatchPDROID, - initializeLocalBinaryPatternbatchPDROID, - uninitializeLocalBinaryPatternbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPS.cpp deleted file mode 100644 index 144a88e40c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LocalBinaryPatternbatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LocalBinaryPatternbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLocalBinaryPatternbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, LocalBinaryPatternbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLocalBinaryPatternbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LocalBinaryPatternbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLocalBinaryPatternbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LocalBinaryPatternbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLocalBinaryPatternbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLocalBinaryPatternbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_local_binary_pattern_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_local_binary_pattern_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLocalBinaryPatternbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternbatchPSLocalData * data = new LocalBinaryPatternbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLocalBinaryPatternbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLocalBinaryPatternbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LocalBinaryPatternbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LocalBinaryPatternbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LocalBinaryPatternbatchPS", - VX_KERNEL_RPP_LOCALBINARYPATTERNBATCHPS, - processLocalBinaryPatternbatchPS, - 6, - validateLocalBinaryPatternbatchPS, - initializeLocalBinaryPatternbatchPS, - uninitializeLocalBinaryPatternbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LookUpTable.cpp b/amd_openvx_extensions/amd_rpp/source/LookUpTable.cpp deleted file mode 100644 index d116ce53cb..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LookUpTable.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LookUpTableLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp8u *lutPtr; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLookUpTable(vx_node node, const vx_reference *parameters, vx_uint32 num, LookUpTableLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->lutPtr = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u),data->lutPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLookUpTable(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LookUpTable: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLookUpTable(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LookUpTableLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLookUpTable(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->lutPtr,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->lutPtr,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLookUpTable(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->lutPtr,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->lutPtr,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLookUpTable(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTableLocalData * data = new LookUpTableLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLookUpTable(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLookUpTable(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTableLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LookUpTable_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LookUpTable", - VX_KERNEL_RPP_LOOKUPTABLE, - processLookUpTable, - 4, - validateLookUpTable, - initializeLookUpTable, - uninitializeLookUpTable); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPD.cpp index b34de204ab..aa323da64b 100644 --- a/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPD.cpp @@ -22,201 +22,266 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct LookUpTablebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *lutPtr; +struct LookUpTablebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp8u *lutPtr; + size_t arr_size; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, LookUpTablebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->lutPtr = (vx_array *)malloc(sizeof(vx_array) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_array),data->lutPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->arr_size, sizeof(Rpp8u), data->lutPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateLookUpTablebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LookUpTablebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LookUpTablebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processLookUpTablebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LookUpTablebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + LookUpTablebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLookUpTablebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshLookUpTablebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_look_up_table_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->lutPtr, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_look_up_table_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->lutPtr, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshLookUpTablebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_look_up_table_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->lutPtr, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_look_up_table_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->lutPtr, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLookUpTablebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshLookUpTablebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_look_up_table_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->lutPtr, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_look_up_table_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->lutPtr, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LookUpTablebatchPDLocalData * data = new LookUpTablebatchPDLocalData; - memset(data, 0, sizeof(*data)); + LookUpTablebatchPDLocalData *data = new LookUpTablebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLookUpTablebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + data->arr_size = 256 * data->nbatchSize; + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->lutPtr = (Rpp8u *)malloc(sizeof(Rpp8u) * data->arr_size); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshLookUpTablebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeLookUpTablebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - LookUpTablebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + LookUpTablebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->lutPtr); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status LookUpTablebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LookUpTablebatchPD", - VX_KERNEL_RPP_LOOKUPTABLEBATCHPD, - processLookUpTablebatchPD, - 7, - validateLookUpTablebatchPD, - initializeLookUpTablebatchPD, - uninitializeLookUpTablebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LookUpTablebatchPD", + VX_KERNEL_RPP_LOOKUPTABLEBATCHPD, + processLookUpTablebatchPD, + 7, + validateLookUpTablebatchPD, + initializeLookUpTablebatchPD, + uninitializeLookUpTablebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPDROID.cpp deleted file mode 100644 index 102a44b4e1..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LookUpTablebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *lutPtr; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLookUpTablebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, LookUpTablebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->lutPtr = (vx_array *)malloc(sizeof(vx_array) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_array),data->lutPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLookUpTablebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LookUpTablebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLookUpTablebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LookUpTablebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLookUpTablebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->lutPtr,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->lutPtr,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLookUpTablebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->lutPtr,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->lutPtr,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLookUpTablebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTablebatchPDROIDLocalData * data = new LookUpTablebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLookUpTablebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLookUpTablebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTablebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LookUpTablebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LookUpTablebatchPDROID", - VX_KERNEL_RPP_LOOKUPTABLEBATCHPDROID, - processLookUpTablebatchPDROID, - 11, - validateLookUpTablebatchPDROID, - initializeLookUpTablebatchPDROID, - uninitializeLookUpTablebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPS.cpp deleted file mode 100644 index 623bbdc63c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/LookUpTablebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct LookUpTablebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_array *lutPtr; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshLookUpTablebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, LookUpTablebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->lutPtr = (vx_array *)malloc(sizeof(vx_array) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_array),data->lutPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateLookUpTablebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: LookUpTablebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processLookUpTablebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - LookUpTablebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshLookUpTablebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshLookUpTablebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_look_up_table_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_look_up_table_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->lutPtr,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeLookUpTablebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTablebatchPSLocalData * data = new LookUpTablebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshLookUpTablebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeLookUpTablebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - LookUpTablebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status LookUpTablebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.LookUpTablebatchPS", - VX_KERNEL_RPP_LOOKUPTABLEBATCHPS, - processLookUpTablebatchPS, - 7, - validateLookUpTablebatchPS, - initializeLookUpTablebatchPS, - uninitializeLookUpTablebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Magnitude.cpp b/amd_openvx_extensions/amd_rpp/source/Magnitude.cpp deleted file mode 100644 index 34ea4f8871..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Magnitude.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MagnitudeLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMagnitude(vx_node node, const vx_reference *parameters, vx_uint32 num, MagnitudeLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMagnitude(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Magnitude: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Magnitude: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMagnitude(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MagnitudeLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMagnitude(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMagnitude(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMagnitude(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudeLocalData * data = new MagnitudeLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMagnitude(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMagnitude(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudeLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Magnitude_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Magnitude", - VX_KERNEL_RPP_MAGNITUDE, - processMagnitude, - 4, - validateMagnitude, - initializeMagnitude, - uninitializeMagnitude); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPD.cpp index 553091b569..c5858efdae 100644 --- a/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPD.cpp @@ -22,209 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MagnitudebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct MagnitudebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MagnitudebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMagnitudebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMagnitudebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MagnitudebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MagnitudebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMagnitudebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshMagnitudebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_magnitude_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_magnitude_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshMagnitudebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_magnitude_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_magnitude_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMagnitudebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMagnitudebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_magnitude_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_magnitude_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MagnitudebatchPDLocalData * data = new MagnitudebatchPDLocalData; - memset(data, 0, sizeof(*data)); + MagnitudebatchPDLocalData *data = new MagnitudebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMagnitudebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshMagnitudebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMagnitudebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MagnitudebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MagnitudebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MagnitudebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MagnitudebatchPD", - VX_KERNEL_RPP_MAGNITUDEBATCHPD, - processMagnitudebatchPD, - 7, - validateMagnitudebatchPD, - initializeMagnitudebatchPD, - uninitializeMagnitudebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MagnitudebatchPD", + VX_KERNEL_RPP_MAGNITUDEBATCHPD, + processMagnitudebatchPD, + 7, + validateMagnitudebatchPD, + initializeMagnitudebatchPD, + uninitializeMagnitudebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPDROID.cpp deleted file mode 100644 index cca603dff9..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MagnitudebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMagnitudebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, MagnitudebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMagnitudebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMagnitudebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MagnitudebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMagnitudebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMagnitudebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMagnitudebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudebatchPDROIDLocalData * data = new MagnitudebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMagnitudebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMagnitudebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MagnitudebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MagnitudebatchPDROID", - VX_KERNEL_RPP_MAGNITUDEBATCHPDROID, - processMagnitudebatchPDROID, - 11, - validateMagnitudebatchPDROID, - initializeMagnitudebatchPDROID, - uninitializeMagnitudebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPS.cpp deleted file mode 100644 index 1ceb0802cc..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MagnitudebatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MagnitudebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMagnitudebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, MagnitudebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMagnitudebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MagnitudebatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMagnitudebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MagnitudebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMagnitudebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMagnitudebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_magnitude_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_magnitude_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMagnitudebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudebatchPSLocalData * data = new MagnitudebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMagnitudebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMagnitudebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MagnitudebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MagnitudebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MagnitudebatchPS", - VX_KERNEL_RPP_MAGNITUDEBATCHPS, - processMagnitudebatchPS, - 7, - validateMagnitudebatchPS, - initializeMagnitudebatchPS, - uninitializeMagnitudebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Max.cpp b/amd_openvx_extensions/amd_rpp/source/Max.cpp deleted file mode 100644 index bd5972beb1..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Max.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MaxLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMax(vx_node node, const vx_reference *parameters, vx_uint32 num, MaxLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMax(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Max: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Max: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMax(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MaxLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMax(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMax(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMax(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxLocalData * data = new MaxLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMax(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMax(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Max_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Max", - VX_KERNEL_RPP_MAX, - processMax, - 4, - validateMax, - initializeMax, - uninitializeMax); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MaxbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/MaxbatchPD.cpp index 98a3ea5385..8f316798be 100644 --- a/amd_openvx_extensions/amd_rpp/source/MaxbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MaxbatchPD.cpp @@ -22,209 +22,275 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MaxbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct MaxbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MaxbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMaxbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMaxbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MaxbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MaxbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMaxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + cl_command_queue handle = data->handle.cmdq; + refreshMaxbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_max_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_max_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshMaxbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_max_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_max_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMaxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMaxbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_max_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_max_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MaxbatchPDLocalData * data = new MaxbatchPDLocalData; - memset(data, 0, sizeof(*data)); + MaxbatchPDLocalData *data = new MaxbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMaxbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshMaxbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMaxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MaxbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MaxbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MaxbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MaxbatchPD", - VX_KERNEL_RPP_MAXBATCHPD, - processMaxbatchPD, - 7, - validateMaxbatchPD, - initializeMaxbatchPD, - uninitializeMaxbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MaxbatchPD", + VX_KERNEL_RPP_MAXBATCHPD, + processMaxbatchPD, + 7, + validateMaxbatchPD, + initializeMaxbatchPD, + uninitializeMaxbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MaxbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/MaxbatchPDROID.cpp deleted file mode 100644 index 4327e37899..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MaxbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MaxbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMaxbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, MaxbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMaxbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMaxbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MaxbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMaxbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMaxbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMaxbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxbatchPDROIDLocalData * data = new MaxbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMaxbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMaxbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MaxbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MaxbatchPDROID", - VX_KERNEL_RPP_MAXBATCHPDROID, - processMaxbatchPDROID, - 11, - validateMaxbatchPDROID, - initializeMaxbatchPDROID, - uninitializeMaxbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MaxbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/MaxbatchPS.cpp deleted file mode 100644 index b59e5fde7f..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MaxbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MaxbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMaxbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, MaxbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMaxbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MaxbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMaxbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MaxbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMaxbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMaxbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_max_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_max_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMaxbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxbatchPSLocalData * data = new MaxbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMaxbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMaxbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MaxbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MaxbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MaxbatchPS", - VX_KERNEL_RPP_MAXBATCHPS, - processMaxbatchPS, - 7, - validateMaxbatchPS, - initializeMaxbatchPS, - uninitializeMaxbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MeanStddev.cpp b/amd_openvx_extensions/amd_rpp/source/MeanStddev.cpp index 3af18e3426..33ae24b737 100644 --- a/amd_openvx_extensions/amd_rpp/source/MeanStddev.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MeanStddev.cpp @@ -22,168 +22,225 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MeanStddevLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - Rpp32f mean; - Rpp32f stdDev; +struct MeanStddevLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + Rpp32f mean; + Rpp32f stdDev; #if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif + cl_mem cl_pSrc; +#elif ENABLE_HIP + void *hip_pSrc; +#endif }; static vx_status VX_CALLBACK refreshMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num, MeanStddevLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[1], &data->mean)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[1], &data->mean)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMeanStddev(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MeanStddev: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MeanStddev: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - vxReleaseImage(&input); - vxReleaseParameter(&input_param); - return status; + vxReleaseImage(&input); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMeanStddev(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MeanStddevLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMeanStddev(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_mean_stddev_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_mean_stddev_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - } - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMeanStddev(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_mean_stddev_u8_pln1_host(data->pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_mean_stddev_u8_pkd3_host(data->pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); - } - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; +static vx_status VX_CALLBACK processMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MeanStddevLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshMeanStddev(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_mean_stddev_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_mean_stddev_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); + // } + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshMeanStddev(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_mean_stddev_u8_pln1_gpu((void *)data->hip_pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_mean_stddev_u8_pkd3_gpu((void *)data->hip_pSrc,data->srcDimensions,&data->mean,&data->stdDev,data->rppHandle); + // } + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMeanStddev(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_mean_stddev_u8_pln1_host(data->pSrc, data->srcDimensions, &data->mean, &data->stdDev, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_mean_stddev_u8_pkd3_host(data->pSrc, data->srcDimensions, &data->mean, &data->stdDev, data->rppHandle); + } + STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->mean)); + STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->stdDev)); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MeanStddevLocalData * data = new MeanStddevLocalData; - memset(data, 0, sizeof(*data)); + MeanStddevLocalData *data = new MeanStddevLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMeanStddev(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshMeanStddev(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMeanStddev(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MeanStddevLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MeanStddevLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MeanStddev_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MeanStddev", - VX_KERNEL_RPP_MEANSTDDEV, - processMeanStddev, - 4, - validateMeanStddev, - initializeMeanStddev, - uninitializeMeanStddev); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MeanStddev", + VX_KERNEL_RPP_MEANSTDDEV, + processMeanStddev, + 4, + validateMeanStddev, + initializeMeanStddev, + uninitializeMeanStddev); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MedianFilter.cpp b/amd_openvx_extensions/amd_rpp/source/MedianFilter.cpp deleted file mode 100644 index fa4032851b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MedianFilter.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MedianFilterLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMedianFilter(vx_node node, const vx_reference *parameters, vx_uint32 num, MedianFilterLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMedianFilter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MedianFilter: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMedianFilter(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MedianFilterLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMedianFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMedianFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMedianFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterLocalData * data = new MedianFilterLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMedianFilter(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMedianFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MedianFilter_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MedianFilter", - VX_KERNEL_RPP_MEDIANFILTER, - processMedianFilter, - 4, - validateMedianFilter, - initializeMedianFilter, - uninitializeMedianFilter); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPD.cpp index 7953039d87..2c19e0909e 100644 --- a/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPD.cpp @@ -22,201 +22,264 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MedianFilterbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct MedianFilterbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MedianFilterbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMedianFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MedianFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MedianFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMedianFilterbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MedianFilterbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MedianFilterbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMedianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshMedianFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_median_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_median_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshMedianFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_median_filter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_median_filter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMedianFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMedianFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_median_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_median_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MedianFilterbatchPDLocalData * data = new MedianFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); + MedianFilterbatchPDLocalData *data = new MedianFilterbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMedianFilterbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshMedianFilterbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMedianFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MedianFilterbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MedianFilterbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MedianFilterbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MedianFilterbatchPD", - VX_KERNEL_RPP_MEDIANFILTERBATCHPD, - processMedianFilterbatchPD, - 7, - validateMedianFilterbatchPD, - initializeMedianFilterbatchPD, - uninitializeMedianFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MedianFilterbatchPD", + VX_KERNEL_RPP_MEDIANFILTERBATCHPD, + processMedianFilterbatchPD, + 7, + validateMedianFilterbatchPD, + initializeMedianFilterbatchPD, + uninitializeMedianFilterbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPDROID.cpp deleted file mode 100644 index 629c75ac6c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MedianFilterbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMedianFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, MedianFilterbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMedianFilterbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MedianFilterbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMedianFilterbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MedianFilterbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMedianFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMedianFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMedianFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterbatchPDROIDLocalData * data = new MedianFilterbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMedianFilterbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMedianFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MedianFilterbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MedianFilterbatchPDROID", - VX_KERNEL_RPP_MEDIANFILTERBATCHPDROID, - processMedianFilterbatchPDROID, - 11, - validateMedianFilterbatchPDROID, - initializeMedianFilterbatchPDROID, - uninitializeMedianFilterbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPS.cpp deleted file mode 100644 index 62794c0848..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MedianFilterbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MedianFilterbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMedianFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, MedianFilterbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMedianFilterbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MedianFilterbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMedianFilterbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MedianFilterbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMedianFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMedianFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_median_filter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_median_filter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMedianFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterbatchPSLocalData * data = new MedianFilterbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMedianFilterbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMedianFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MedianFilterbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MedianFilterbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MedianFilterbatchPS", - VX_KERNEL_RPP_MEDIANFILTERBATCHPS, - processMedianFilterbatchPS, - 7, - validateMedianFilterbatchPS, - initializeMedianFilterbatchPS, - uninitializeMedianFilterbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Min.cpp b/amd_openvx_extensions/amd_rpp/source/Min.cpp deleted file mode 100644 index e11a8ac553..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Min.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MinLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMin(vx_node node, const vx_reference *parameters, vx_uint32 num, MinLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMin(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Min: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Min: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMin(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMin(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMin(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMin(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinLocalData * data = new MinLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMin(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMin(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Min_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Min", - VX_KERNEL_RPP_MIN, - processMin, - 4, - validateMin, - initializeMin, - uninitializeMin); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MinMaxLoc.cpp b/amd_openvx_extensions/amd_rpp/source/MinMaxLoc.cpp index 2ed7dd8b5a..dfe54dd6e8 100644 --- a/amd_openvx_extensions/amd_rpp/source/MinMaxLoc.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MinMaxLoc.cpp @@ -22,182 +22,243 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MinMaxLocLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - Rpp8u min; - Rpp8u max; - Rpp32u minLoc; - Rpp32u maxLoc; +struct MinMaxLocLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + Rpp8u min; + Rpp8u max; + Rpp32u minLoc; + Rpp32u maxLoc; #if ENABLE_OPENCL - cl_mem cl_pSrc; -#endif + cl_mem cl_pSrc; +#elif ENABLE_HIP + void *hip_pSrc; +#endif }; static vx_status VX_CALLBACK refreshMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num, MinMaxLocLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[1], &data->min)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->max)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->minLoc)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[1], &data->min)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->max)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->minLoc)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->maxLoc)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMinMaxLoc(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinMaxLoc: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[1], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT8) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #1 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT8) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinMaxLoc: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - vxReleaseImage(&input); - vxReleaseParameter(&input_param); - return status; + vxReleaseImage(&input); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMinMaxLoc(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinMaxLocLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMinMaxLoc(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_max_loc_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_max_loc_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - } - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMinMaxLoc(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_max_loc_u8_pln1_host(data->pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_max_loc_u8_pkd3_host(data->pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); - } - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); - STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; +static vx_status VX_CALLBACK processMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MinMaxLocLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshMinMaxLoc(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_min_max_loc_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_min_max_loc_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); + // } + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshMinMaxLoc(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_min_max_loc_u8_pln1_gpu((void *)data->hip_pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_min_max_loc_u8_pkd3_gpu((void *)data->hip_pSrc,data->srcDimensions,&data->min,&data->max,&data->minLoc,&data->maxLoc,data->rppHandle); + // } + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); + // STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMinMaxLoc(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_min_max_loc_u8_pln1_host(data->pSrc, data->srcDimensions, &data->min, &data->max, &data->minLoc, &data->maxLoc, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_min_max_loc_u8_pkd3_host(data->pSrc, data->srcDimensions, &data->min, &data->max, &data->minLoc, &data->maxLoc, data->rppHandle); + } + STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[1], &data->min)); + STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[2], &data->max)); + STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[3], &data->minLoc)); + STATUS_ERROR_CHECK(vxWriteScalarValue((vx_scalar)parameters[4], &data->maxLoc)); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MinMaxLocLocalData * data = new MinMaxLocLocalData; - memset(data, 0, sizeof(*data)); + MinMaxLocLocalData *data = new MinMaxLocLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMinMaxLoc(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshMinMaxLoc(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMinMaxLoc(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MinMaxLocLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MinMaxLocLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MinMaxLoc_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinMaxLoc", - VX_KERNEL_RPP_MINMAXLOC, - processMinMaxLoc, - 6, - validateMinMaxLoc, - initializeMinMaxLoc, - uninitializeMinMaxLoc); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinMaxLoc", + VX_KERNEL_RPP_MINMAXLOC, + processMinMaxLoc, + 6, + validateMinMaxLoc, + initializeMinMaxLoc, + uninitializeMinMaxLoc); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_BIDIRECTIONAL, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MinbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/MinbatchPD.cpp index 2af2c7de35..2282f0649f 100644 --- a/amd_openvx_extensions/amd_rpp/source/MinbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MinbatchPD.cpp @@ -22,209 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MinbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct MinbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MinbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMinbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMinbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MinbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMinbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshMinbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_min_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_min_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshMinbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_min_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_min_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMinbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMinbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_min_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_min_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MinbatchPDLocalData * data = new MinbatchPDLocalData; - memset(data, 0, sizeof(*data)); + MinbatchPDLocalData *data = new MinbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMinbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshMinbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMinbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MinbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MinbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MinbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinbatchPD", - VX_KERNEL_RPP_MINBATCHPD, - processMinbatchPD, - 7, - validateMinbatchPD, - initializeMinbatchPD, - uninitializeMinbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinbatchPD", + VX_KERNEL_RPP_MINBATCHPD, + processMinbatchPD, + 7, + validateMinbatchPD, + initializeMinbatchPD, + uninitializeMinbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MinbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/MinbatchPDROID.cpp deleted file mode 100644 index 1a2933f4db..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MinbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MinbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMinbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, MinbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMinbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMinbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMinbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMinbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMinbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinbatchPDROIDLocalData * data = new MinbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMinbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMinbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MinbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinbatchPDROID", - VX_KERNEL_RPP_MINBATCHPDROID, - processMinbatchPDROID, - 11, - validateMinbatchPDROID, - initializeMinbatchPDROID, - uninitializeMinbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MinbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/MinbatchPS.cpp deleted file mode 100644 index 96dd4fb30d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MinbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MinbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMinbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, MinbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMinbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MinbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMinbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MinbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMinbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMinbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_min_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_min_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMinbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinbatchPSLocalData * data = new MinbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMinbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMinbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MinbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MinbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MinbatchPS", - VX_KERNEL_RPP_MINBATCHPS, - processMinbatchPS, - 7, - validateMinbatchPS, - initializeMinbatchPS, - uninitializeMinbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Multiply.cpp b/amd_openvx_extensions/amd_rpp/source/Multiply.cpp deleted file mode 100644 index b78a010352..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Multiply.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MultiplyLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num, MultiplyLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMultiply(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Multiply: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Multiply: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMultiply(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MultiplyLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMultiply(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMultiply(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplyLocalData * data = new MultiplyLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMultiply(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplyLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Multiply_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Multiply", - VX_KERNEL_RPP_MULTIPLY, - processMultiply, - 4, - validateMultiply, - initializeMultiply, - uninitializeMultiply); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MultiplybatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/MultiplybatchPD.cpp index b98f5e93fd..1685733864 100644 --- a/amd_openvx_extensions/amd_rpp/source/MultiplybatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/MultiplybatchPD.cpp @@ -22,209 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct MultiplybatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct MultiplybatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, MultiplybatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateMultiplybatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processMultiplybatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MultiplybatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + MultiplybatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMultiplybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshMultiplybatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_multiply_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_multiply_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshMultiplybatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_multiply_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_multiply_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMultiplybatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshMultiplybatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_multiply_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_multiply_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MultiplybatchPDLocalData * data = new MultiplybatchPDLocalData; - memset(data, 0, sizeof(*data)); + MultiplybatchPDLocalData *data = new MultiplybatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMultiplybatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshMultiplybatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeMultiplybatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - MultiplybatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + MultiplybatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status MultiplybatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MultiplybatchPD", - VX_KERNEL_RPP_MULTIPLYBATCHPD, - processMultiplybatchPD, - 7, - validateMultiplybatchPD, - initializeMultiplybatchPD, - uninitializeMultiplybatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MultiplybatchPD", + VX_KERNEL_RPP_MULTIPLYBATCHPD, + processMultiplybatchPD, + 7, + validateMultiplybatchPD, + initializeMultiplybatchPD, + uninitializeMultiplybatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/MultiplybatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/MultiplybatchPDROID.cpp deleted file mode 100644 index e4c27c3289..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MultiplybatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MultiplybatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMultiplybatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, MultiplybatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMultiplybatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMultiplybatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MultiplybatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMultiplybatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMultiplybatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMultiplybatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplybatchPDROIDLocalData * data = new MultiplybatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMultiplybatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMultiplybatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplybatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MultiplybatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MultiplybatchPDROID", - VX_KERNEL_RPP_MULTIPLYBATCHPDROID, - processMultiplybatchPDROID, - 11, - validateMultiplybatchPDROID, - initializeMultiplybatchPDROID, - uninitializeMultiplybatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/MultiplybatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/MultiplybatchPS.cpp deleted file mode 100644 index 658c9103a9..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/MultiplybatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct MultiplybatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshMultiplybatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, MultiplybatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateMultiplybatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: MultiplybatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processMultiplybatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - MultiplybatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshMultiplybatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshMultiplybatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_multiply_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_multiply_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeMultiplybatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplybatchPSLocalData * data = new MultiplybatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshMultiplybatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeMultiplybatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - MultiplybatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status MultiplybatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MultiplybatchPS", - VX_KERNEL_RPP_MULTIPLYBATCHPS, - processMultiplybatchPS, - 7, - validateMultiplybatchPS, - initializeMultiplybatchPS, - uninitializeMultiplybatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Noise.cpp b/amd_openvx_extensions/amd_rpp/source/Noise.cpp deleted file mode 100644 index f1546b1de8..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Noise.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NoiseLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f noiseProbability; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNoise(vx_node node, const vx_reference *parameters, vx_uint32 num, NoiseLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->noiseProbability)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNoise(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Noise: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNoise(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NoiseLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNoise(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNoise(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->noiseProbability,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->noiseProbability,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNoise(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoiseLocalData * data = new NoiseLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNoise(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNoise(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoiseLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Noise_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Noise", - VX_KERNEL_RPP_NOISE, - processNoise, - 4, - validateNoise, - initializeNoise, - uninitializeNoise); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NoisebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/NoisebatchPD.cpp index 0a1cd0f5ce..75cb3e267f 100644 --- a/amd_openvx_extensions/amd_rpp/source/NoisebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/NoisebatchPD.cpp @@ -22,222 +22,265 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct NoisebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *noiseProbability; +struct NoisebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *noiseProbability; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, NoisebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->noiseProbability = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->noiseProbability, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->noiseProbability, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateNoisebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NoisebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NoisebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processNoisebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NoisebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + NoisebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNoisebatchPD(node, parameters, num, data); - data->noiseProbability[0] = 0.01; - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshNoisebatchPD(node, parameters, num, data); + data->noiseProbability[0] = 0.01; + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_noise_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->noiseProbability, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_noise_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->noiseProbability, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshNoisebatchPD(node, parameters, num, data); - data->noiseProbability[0] = 0.01; - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshNoisebatchPD(node, parameters, num, data); + data->noiseProbability[0] = 0.01; + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_noise_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->noiseProbability, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_noise_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->noiseProbability, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNoisebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshNoisebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_noise_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->noiseProbability, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_noise_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->noiseProbability, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NoisebatchPDLocalData * data = new NoisebatchPDLocalData; - memset(data, 0, sizeof(*data)); + NoisebatchPDLocalData *data = new NoisebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNoisebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->noiseProbability = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshNoisebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeNoisebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NoisebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + NoisebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->noiseProbability); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status NoisebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NoisebatchPD", - VX_KERNEL_RPP_NOISEBATCHPD, - processNoisebatchPD, - 7, - validateNoisebatchPD, - initializeNoisebatchPD, - uninitializeNoisebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NoisebatchPD", + VX_KERNEL_RPP_NOISEBATCHPD, + processNoisebatchPD, + 7, + validateNoisebatchPD, + initializeNoisebatchPD, + uninitializeNoisebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/NoisebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/NoisebatchPDROID.cpp deleted file mode 100644 index c169a702e2..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NoisebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NoisebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *noiseProbability; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNoisebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, NoisebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->noiseProbability = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->noiseProbability, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNoisebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NoisebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNoisebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NoisebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNoisebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNoisebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->noiseProbability,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->noiseProbability,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNoisebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoisebatchPDROIDLocalData * data = new NoisebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNoisebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNoisebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoisebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NoisebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NoisebatchPDROID", - VX_KERNEL_RPP_NOISEBATCHPDROID, - processNoisebatchPDROID, - 11, - validateNoisebatchPDROID, - initializeNoisebatchPDROID, - uninitializeNoisebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NoisebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/NoisebatchPS.cpp deleted file mode 100644 index 6859836ec1..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NoisebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NoisebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f noiseProbability; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNoisebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, NoisebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->noiseProbability)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNoisebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NoisebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNoisebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NoisebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNoisebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNoisebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_noise_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_noise_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->noiseProbability,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNoisebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoisebatchPSLocalData * data = new NoisebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNoisebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNoisebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NoisebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NoisebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NoisebatchPS", - VX_KERNEL_RPP_NOISEBATCHPS, - processNoisebatchPS, - 7, - validateNoisebatchPS, - initializeNoisebatchPS, - uninitializeNoisebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NonLinearFilter.cpp b/amd_openvx_extensions/amd_rpp/source/NonLinearFilter.cpp deleted file mode 100644 index 7df72597ed..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NonLinearFilter.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonLinearFilterLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonLinearFilter(vx_node node, const vx_reference *parameters, vx_uint32 num, NonLinearFilterLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonLinearFilter(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonLinearFilter: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonLinearFilter(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonLinearFilterLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonLinearFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonLinearFilter(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonLinearFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterLocalData * data = new NonLinearFilterLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonLinearFilter(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonLinearFilter(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NonLinearFilter_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonLinearFilter", - VX_KERNEL_RPP_NONLINEARFILTER, - processNonLinearFilter, - 4, - validateNonLinearFilter, - initializeNonLinearFilter, - uninitializeNonLinearFilter); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPD.cpp index aa02d497c0..7d87c14eec 100644 --- a/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPD.cpp @@ -22,201 +22,264 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct NonLinearFilterbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct NonLinearFilterbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, NonLinearFilterbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateNonLinearFilterbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonLinearFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonLinearFilterbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processNonLinearFilterbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonLinearFilterbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + NonLinearFilterbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonLinearFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshNonLinearFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshNonLinearFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonLinearFilterbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshNonLinearFilterbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NonLinearFilterbatchPDLocalData * data = new NonLinearFilterbatchPDLocalData; - memset(data, 0, sizeof(*data)); + NonLinearFilterbatchPDLocalData *data = new NonLinearFilterbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonLinearFilterbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshNonLinearFilterbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeNonLinearFilterbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NonLinearFilterbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + NonLinearFilterbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status NonLinearFilterbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonLinearFilterbatchPD", - VX_KERNEL_RPP_NONLINEARFILTERBATCHPD, - processNonLinearFilterbatchPD, - 7, - validateNonLinearFilterbatchPD, - initializeNonLinearFilterbatchPD, - uninitializeNonLinearFilterbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonLinearFilterbatchPD", + VX_KERNEL_RPP_NONLINEARFILTERBATCHPD, + processNonLinearFilterbatchPD, + 7, + validateNonLinearFilterbatchPD, + initializeNonLinearFilterbatchPD, + uninitializeNonLinearFilterbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPDROID.cpp deleted file mode 100644 index b0817ed02f..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonLinearFilterbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonLinearFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, NonLinearFilterbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonLinearFilterbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonLinearFilterbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonLinearFilterbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonLinearFilterbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonLinearFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonLinearFilterbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonLinearFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterbatchPDROIDLocalData * data = new NonLinearFilterbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonLinearFilterbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonLinearFilterbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NonLinearFilterbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonLinearFilterbatchPDROID", - VX_KERNEL_RPP_NONLINEARFILTERBATCHPDROID, - processNonLinearFilterbatchPDROID, - 11, - validateNonLinearFilterbatchPDROID, - initializeNonLinearFilterbatchPDROID, - uninitializeNonLinearFilterbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPS.cpp deleted file mode 100644 index dc5e1f92ff..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NonLinearFilterbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonLinearFilterbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonLinearFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, NonLinearFilterbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonLinearFilterbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonLinearFilterbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonLinearFilterbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonLinearFilterbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonLinearFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonLinearFilterbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_nonlinear_filter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_nonlinear_filter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonLinearFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterbatchPSLocalData * data = new NonLinearFilterbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonLinearFilterbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonLinearFilterbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonLinearFilterbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NonLinearFilterbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonLinearFilterbatchPS", - VX_KERNEL_RPP_NONLINEARFILTERBATCHPS, - processNonLinearFilterbatchPS, - 7, - validateNonLinearFilterbatchPS, - initializeNonLinearFilterbatchPS, - uninitializeNonLinearFilterbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NonMaxSupression.cpp b/amd_openvx_extensions/amd_rpp/source/NonMaxSupression.cpp deleted file mode 100644 index 73d2d4e6bf..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NonMaxSupression.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonMaxSupressionLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonMaxSupression(vx_node node, const vx_reference *parameters, vx_uint32 num, NonMaxSupressionLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->kernelSize)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonMaxSupression(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonMaxSupression: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonMaxSupression(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonMaxSupressionLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonMaxSupression(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonMaxSupression(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->kernelSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonMaxSupression(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionLocalData * data = new NonMaxSupressionLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonMaxSupression(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonMaxSupression(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NonMaxSupression_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonMaxSupression", - VX_KERNEL_RPP_NONMAXSUPRESSION, - processNonMaxSupression, - 4, - validateNonMaxSupression, - initializeNonMaxSupression, - uninitializeNonMaxSupression); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPD.cpp index 25978f239f..2e73a17ffb 100644 --- a/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPD.cpp @@ -22,201 +22,264 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct NonMaxSupressionbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; +struct NonMaxSupressionbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *kernelSize; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, NonMaxSupressionbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateNonMaxSupressionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonMaxSupressionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonMaxSupressionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processNonMaxSupressionbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonMaxSupressionbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + NonMaxSupressionbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonMaxSupressionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshNonMaxSupressionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshNonMaxSupressionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonMaxSupressionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshNonMaxSupressionbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->kernelSize, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NonMaxSupressionbatchPDLocalData * data = new NonMaxSupressionbatchPDLocalData; - memset(data, 0, sizeof(*data)); + NonMaxSupressionbatchPDLocalData *data = new NonMaxSupressionbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonMaxSupressionbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshNonMaxSupressionbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeNonMaxSupressionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NonMaxSupressionbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + NonMaxSupressionbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->kernelSize); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status NonMaxSupressionbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonMaxSupressionbatchPD", - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD, - processNonMaxSupressionbatchPD, - 7, - validateNonMaxSupressionbatchPD, - initializeNonMaxSupressionbatchPD, - uninitializeNonMaxSupressionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonMaxSupressionbatchPD", + VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPD, + processNonMaxSupressionbatchPD, + 7, + validateNonMaxSupressionbatchPD, + initializeNonMaxSupressionbatchPD, + uninitializeNonMaxSupressionbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPDROID.cpp deleted file mode 100644 index 7c5892bf40..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonMaxSupressionbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonMaxSupressionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, NonMaxSupressionbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->kernelSize = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->kernelSize, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonMaxSupressionbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonMaxSupressionbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonMaxSupressionbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonMaxSupressionbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonMaxSupressionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonMaxSupressionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonMaxSupressionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionbatchPDROIDLocalData * data = new NonMaxSupressionbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonMaxSupressionbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonMaxSupressionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NonMaxSupressionbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonMaxSupressionbatchPDROID", - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPDROID, - processNonMaxSupressionbatchPDROID, - 11, - validateNonMaxSupressionbatchPDROID, - initializeNonMaxSupressionbatchPDROID, - uninitializeNonMaxSupressionbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPS.cpp deleted file mode 100644 index 893158ce37..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/NonMaxSupressionbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct NonMaxSupressionbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u kernelSize; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshNonMaxSupressionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, NonMaxSupressionbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->kernelSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateNonMaxSupressionbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: NonMaxSupressionbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processNonMaxSupressionbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - NonMaxSupressionbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshNonMaxSupressionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshNonMaxSupressionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_non_max_suppression_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_non_max_suppression_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->kernelSize,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeNonMaxSupressionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionbatchPSLocalData * data = new NonMaxSupressionbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshNonMaxSupressionbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeNonMaxSupressionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - NonMaxSupressionbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status NonMaxSupressionbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.NonMaxSupressionbatchPS", - VX_KERNEL_RPP_NONMAXSUPRESSIONBATCHPS, - processNonMaxSupressionbatchPS, - 7, - validateNonMaxSupressionbatchPS, - initializeNonMaxSupressionbatchPS, - uninitializeNonMaxSupressionbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Occlusion.cpp b/amd_openvx_extensions/amd_rpp/source/Occlusion.cpp deleted file mode 100644 index 049bd6dbf5..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Occlusion.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct OcclusionLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - Rpp32u src1x1; - Rpp32u src1y1; - Rpp32u src1x2; - Rpp32u src1y2; - Rpp32u src2x1; - Rpp32u src2y1; - Rpp32u src2x2; - Rpp32u src2y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshOcclusion(vx_node node, const vx_reference *parameters, vx_uint32 num, OcclusionLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->src1x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->src1y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->src1x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->src1y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->src2x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->src2y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->src2x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->src2y2)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateOcclusion(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Occlusion: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Occlusion: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processOcclusion(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - OcclusionLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshOcclusion(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_gpu((void *)data->cl_pSrc1,data->srcDimensions,(void *)data->cl_pSrc2,data->dstDimensions,(void *)data->cl_pDst,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_gpu((void *)data->cl_pSrc1,data->srcDimensions,(void *)data->cl_pSrc2,data->dstDimensions,(void *)data->cl_pDst,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshOcclusion(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_host(data->pSrc1,data->srcDimensions,data->pSrc2,data->dstDimensions,data->pDst,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_host(data->pSrc1,data->srcDimensions,data->pSrc2,data->dstDimensions,data->pDst,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeOcclusion(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionLocalData * data = new OcclusionLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshOcclusion(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeOcclusion(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Occlusion_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Occlusion", - VX_KERNEL_RPP_OCCLUSION, - processOcclusion, - 12, - validateOcclusion, - initializeOcclusion, - uninitializeOcclusion); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPD.cpp deleted file mode 100644 index bacfcc8249..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPD.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct OcclusionbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_uint32 *src1x1; - vx_uint32 *src1y1; - vx_uint32 *src1x2; - vx_uint32 *src1y2; - vx_uint32 *src2x1; - vx_uint32 *src2y1; - vx_uint32 *src2x2; - vx_uint32 *src2y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshOcclusionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, OcclusionbatchPDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->src1x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->src1y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->src1x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[10], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, arr_size, sizeof(vx_uint32),data->src1y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[11], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[11], 0, arr_size, sizeof(vx_uint32),data->src2x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[12], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[12], 0, arr_size, sizeof(vx_uint32),data->src2y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[13], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[13], 0, arr_size, sizeof(vx_uint32),data->src2x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[14], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[14], 0, arr_size, sizeof(vx_uint32),data->src2y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[15], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateOcclusionbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[15], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #15 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[16], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #16 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: OcclusionbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: OcclusionbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processOcclusionbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - OcclusionbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshOcclusionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshOcclusionbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeOcclusionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionbatchPDLocalData * data = new OcclusionbatchPDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[16], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshOcclusionbatchPD(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeOcclusionbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status OcclusionbatchPD_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.OcclusionbatchPD", - VX_KERNEL_RPP_OCCLUSIONBATCHPD, - processOcclusionbatchPD, - 17, - validateOcclusionbatchPD, - initializeOcclusionbatchPD, - uninitializeOcclusionbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 14, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 15, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 16, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPDROID.cpp deleted file mode 100644 index f3d991ded6..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPDROID.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct OcclusionbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - vx_uint32 *src1x1; - vx_uint32 *src1y1; - vx_uint32 *src1x2; - vx_uint32 *src1y2; - vx_uint32 *src2x1; - vx_uint32 *src2y1; - vx_uint32 *src2x2; - vx_uint32 *src2y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshOcclusionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, OcclusionbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->src1x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->src1y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->src1x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[10], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src1y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, arr_size, sizeof(vx_uint32),data->src1y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[11], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[11], 0, arr_size, sizeof(vx_uint32),data->src2x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[12], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[12], 0, arr_size, sizeof(vx_uint32),data->src2y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[13], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[13], 0, arr_size, sizeof(vx_uint32),data->src2x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[14], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->src2y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[14], 0, arr_size, sizeof(vx_uint32),data->src2y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[15], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[20], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[16], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[17], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[18], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[19], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateOcclusionbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[15], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #15 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[20], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #20 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[21], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #21 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: OcclusionbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: OcclusionbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processOcclusionbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - OcclusionbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshOcclusionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshOcclusionbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeOcclusionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionbatchPDROIDLocalData * data = new OcclusionbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[21], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshOcclusionbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeOcclusionbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status OcclusionbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.OcclusionbatchPDROID", - VX_KERNEL_RPP_OCCLUSIONBATCHPDROID, - processOcclusionbatchPDROID, - 22, - validateOcclusionbatchPDROID, - initializeOcclusionbatchPDROID, - uninitializeOcclusionbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 14, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 15, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 16, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 17, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 18, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 19, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 20, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 21, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPS.cpp deleted file mode 100644 index 0d803e8132..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/OcclusionbatchPS.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct OcclusionbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; - Rpp32u src1x1; - Rpp32u src1y1; - Rpp32u src1x2; - Rpp32u src1y2; - Rpp32u src2x1; - Rpp32u src2y1; - Rpp32u src2x2; - Rpp32u src2y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshOcclusionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, OcclusionbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->src1x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->src1y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->src1x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->src1y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->src2x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->src2y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->src2x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[12], &data->src2y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[13], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateOcclusionbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[13], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #13 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[14], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #14 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: OcclusionbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: OcclusionbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processOcclusionbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - OcclusionbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshOcclusionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshOcclusionbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_occlusion_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_occlusion_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->src1x1,data->src1y1,data->src1x2,data->src1y2,data->src2x1,data->src2y1,data->src2x2,data->src2y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeOcclusionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionbatchPSLocalData * data = new OcclusionbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[14], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshOcclusionbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeOcclusionbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - OcclusionbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status OcclusionbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.OcclusionbatchPS", - VX_KERNEL_RPP_OCCLUSIONBATCHPS, - processOcclusionbatchPS, - 15, - validateOcclusionbatchPS, - initializeOcclusionbatchPS, - uninitializeOcclusionbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 14, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Phase.cpp b/amd_openvx_extensions/amd_rpp/source/Phase.cpp deleted file mode 100644 index 4e33bc51fa..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Phase.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PhaseLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPhase(vx_node node, const vx_reference *parameters, vx_uint32 num, PhaseLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePhase(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Phase: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Phase: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPhase(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PhaseLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPhase(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPhase(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializePhase(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhaseLocalData * data = new PhaseLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPhase(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePhase(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhaseLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Phase_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Phase", - VX_KERNEL_RPP_PHASE, - processPhase, - 4, - validatePhase, - initializePhase, - uninitializePhase); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/PhasebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/PhasebatchPD.cpp index d2da756eaa..c2740f46c5 100644 --- a/amd_openvx_extensions/amd_rpp/source/PhasebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/PhasebatchPD.cpp @@ -22,209 +22,273 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct PhasebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct PhasebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshPhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, PhasebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validatePhasebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processPhasebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PhasebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processPhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + PhasebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPhasebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - + refreshPhasebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_phase_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_phase_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshPhasebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_phase_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_phase_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPhasebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshPhasebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_phase_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_phase_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializePhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializePhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - PhasebatchPDLocalData * data = new PhasebatchPDLocalData; - memset(data, 0, sizeof(*data)); + PhasebatchPDLocalData *data = new PhasebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPhasebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshPhasebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializePhasebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - PhasebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + PhasebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status PhasebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PhasebatchPD", - VX_KERNEL_RPP_PHASEBATCHPD, - processPhasebatchPD, - 7, - validatePhasebatchPD, - initializePhasebatchPD, - uninitializePhasebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PhasebatchPD", + VX_KERNEL_RPP_PHASEBATCHPD, + processPhasebatchPD, + 7, + validatePhasebatchPD, + initializePhasebatchPD, + uninitializePhasebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/PhasebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/PhasebatchPDROID.cpp deleted file mode 100644 index d939e74f4a..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/PhasebatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PhasebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPhasebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, PhasebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePhasebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPhasebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PhasebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPhasebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPhasebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializePhasebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhasebatchPDROIDLocalData * data = new PhasebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPhasebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePhasebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhasebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status PhasebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PhasebatchPDROID", - VX_KERNEL_RPP_PHASEBATCHPDROID, - processPhasebatchPDROID, - 11, - validatePhasebatchPDROID, - initializePhasebatchPDROID, - uninitializePhasebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/PhasebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/PhasebatchPS.cpp deleted file mode 100644 index 41ce9097f2..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/PhasebatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PhasebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPhasebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, PhasebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePhasebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PhasebatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPhasebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PhasebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPhasebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPhasebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_phase_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_phase_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializePhasebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhasebatchPSLocalData * data = new PhasebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPhasebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePhasebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PhasebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status PhasebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PhasebatchPS", - VX_KERNEL_RPP_PHASEBATCHPS, - processPhasebatchPS, - 7, - validatePhasebatchPS, - initializePhasebatchPS, - uninitializePhasebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Pixelate.cpp b/amd_openvx_extensions/amd_rpp/source/Pixelate.cpp deleted file mode 100644 index efd0431fdd..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Pixelate.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PixelateLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPixelate(vx_node node, const vx_reference *parameters, vx_uint32 num, PixelateLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePixelate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Pixelate: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPixelate(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PixelateLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPixelate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPixelate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializePixelate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelateLocalData * data = new PixelateLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPixelate(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePixelate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelateLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Pixelate_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Pixelate", - VX_KERNEL_RPP_PIXELATE, - processPixelate, - 3, - validatePixelate, - initializePixelate, - uninitializePixelate); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/PixelatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/PixelatebatchPD.cpp index 3f5bd28676..1cbf52d8a1 100644 --- a/amd_openvx_extensions/amd_rpp/source/PixelatebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/PixelatebatchPD.cpp @@ -22,215 +22,258 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct PixelatebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; +struct PixelatebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshPixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, PixelatebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validatePixelatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PixelatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PixelatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processPixelatebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PixelatebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processPixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + PixelatebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPixelatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshPixelatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_pixelate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_pixelate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshPixelatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshPixelatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_pixelate_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_pixelate_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPixelatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshPixelatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_pixelate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_pixelate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializePixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializePixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - PixelatebatchPDLocalData * data = new PixelatebatchPDLocalData; - memset(data, 0, sizeof(*data)); + PixelatebatchPDLocalData *data = new PixelatebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPixelatebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshPixelatebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializePixelatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - PixelatebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + PixelatebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status PixelatebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PixelatebatchPD", - VX_KERNEL_RPP_PIXELATEBATCHPD, - processPixelatebatchPD, - 6, - validatePixelatebatchPD, - initializePixelatebatchPD, - uninitializePixelatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PixelatebatchPD", + VX_KERNEL_RPP_PIXELATEBATCHPD, + processPixelatebatchPD, + 6, + validatePixelatebatchPD, + initializePixelatebatchPD, + uninitializePixelatebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/PixelatebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/PixelatebatchPDROID.cpp deleted file mode 100644 index ca0e26f84b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/PixelatebatchPDROID.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PixelatebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPixelatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, PixelatebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePixelatebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PixelatebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPixelatebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PixelatebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPixelatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPixelatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializePixelatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelatebatchPDROIDLocalData * data = new PixelatebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPixelatebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePixelatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelatebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status PixelatebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PixelatebatchPDROID", - VX_KERNEL_RPP_PIXELATEBATCHPDROID, - processPixelatebatchPDROID, - 10, - validatePixelatebatchPDROID, - initializePixelatebatchPDROID, - uninitializePixelatebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/PixelatebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/PixelatebatchPS.cpp deleted file mode 100644 index a6b17ac616..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/PixelatebatchPS.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct PixelatebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshPixelatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, PixelatebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validatePixelatebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: PixelatebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processPixelatebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - PixelatebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshPixelatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshPixelatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_pixelate_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_pixelate_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializePixelatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelatebatchPSLocalData * data = new PixelatebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshPixelatebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializePixelatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - PixelatebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status PixelatebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.PixelatebatchPS", - VX_KERNEL_RPP_PIXELATEBATCHPS, - processPixelatebatchPS, - 6, - validatePixelatebatchPS, - initializePixelatebatchPS, - uninitializePixelatebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Rain.cpp b/amd_openvx_extensions/amd_rpp/source/Rain.cpp deleted file mode 100644 index fe7a9007d4..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Rain.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RainLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f rainValue; - Rpp32u rainWidth; - Rpp32u rainHeight; - Rpp32f rainTransperancy; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRain(vx_node node, const vx_reference *parameters, vx_uint32 num, RainLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->rainValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->rainWidth)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->rainHeight)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->rainTransperancy)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRain(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Rain: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRain(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RainLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRain(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - - rpp_status = rppi_rain_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRain(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - - rpp_status = rppi_rain_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRain(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainLocalData * data = new RainLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRain(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRain(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Rain_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Rain", - VX_KERNEL_RPP_RAIN, - processRain, - 7, - validateRain, - initializeRain, - uninitializeRain); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RainbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/RainbatchPD.cpp index 214244d934..56f9502cfa 100644 --- a/amd_openvx_extensions/amd_rpp/source/RainbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/RainbatchPD.cpp @@ -22,235 +22,278 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct RainbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *rainValue; - vx_uint32 *rainWidth; - vx_uint32 *rainHeight; - vx_float32 *rainTransperancy; +struct RainbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *rainValue; + vx_uint32 *rainWidth; + vx_uint32 *rainHeight; + vx_float32 *rainTransperancy; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RainbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->rainValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->rainWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->rainHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainTransperancy = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_float32),data->rainTransperancy, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->rainValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->rainWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->rainHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_float32), data->rainTransperancy, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateRainbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RainbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RainbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processRainbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RainbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + RainbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRainbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rain_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshRainbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_rain_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_rain_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshRainbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rain_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshRainbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_rain_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_rain_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRainbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rain_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshRainbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_rain_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_rain_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->rainValue, data->rainWidth, data->rainHeight, data->rainTransperancy, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RainbatchPDLocalData * data = new RainbatchPDLocalData; - memset(data, 0, sizeof(*data)); + RainbatchPDLocalData *data = new RainbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRainbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); + data->rainValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->rainWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->rainHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->rainTransperancy = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshRainbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeRainbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RainbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RainbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->rainHeight); + free(data->rainWidth); + free(data->rainTransperancy); + free(data->rainValue); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status RainbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RainbatchPD", - VX_KERNEL_RPP_RAINBATCHPD, - processRainbatchPD, - 10, - validateRainbatchPD, - initializeRainbatchPD, - uninitializeRainbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RainbatchPD", + VX_KERNEL_RPP_RAINBATCHPD, + processRainbatchPD, + 10, + validateRainbatchPD, + initializeRainbatchPD, + uninitializeRainbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/RainbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/RainbatchPDROID.cpp deleted file mode 100644 index e9b2580f3d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RainbatchPDROID.cpp +++ /dev/null @@ -1,257 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RainbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *rainValue; - vx_uint32 *rainWidth; - vx_uint32 *rainHeight; - vx_float32 *rainTransperancy; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRainbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, RainbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->rainValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainWidth = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->rainWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainHeight = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->rainHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rainTransperancy = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_float32),data->rainTransperancy, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[12], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[11], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRainbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[13], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #13 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RainbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRainbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RainbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRainbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_rain_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_rain_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRainbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_rain_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_rain_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRainbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainbatchPDROIDLocalData * data = new RainbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[13], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRainbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRainbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RainbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RainbatchPDROID", - VX_KERNEL_RPP_RAINBATCHPDROID, - processRainbatchPDROID, - 14, - validateRainbatchPDROID, - initializeRainbatchPDROID, - uninitializeRainbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RainbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/RainbatchPS.cpp deleted file mode 100644 index 9552c3f221..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RainbatchPS.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RainbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f rainValue; - Rpp32u rainWidth; - Rpp32u rainHeight; - Rpp32f rainTransperancy; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRainbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, RainbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->rainValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->rainWidth)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->rainHeight)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->rainTransperancy)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRainbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RainbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRainbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RainbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRainbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - - rpp_status = rppi_rain_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRainbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rain_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rain_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->rainValue,data->rainWidth,data->rainHeight,data->rainTransperancy,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRainbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainbatchPSLocalData * data = new RainbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRainbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRainbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RainbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RainbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RainbatchPS", - VX_KERNEL_RPP_RAINBATCHPS, - processRainbatchPS, - 10, - validateRainbatchPS, - initializeRainbatchPS, - uninitializeRainbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBox.cpp b/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBox.cpp deleted file mode 100644 index fdd8f2bd9e..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBox.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomCropLetterBoxLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u x1; - Rpp32u y1; - Rpp32u x2; - Rpp32u y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomCropLetterBox(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomCropLetterBoxLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->y2)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomCropLetterBox(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomCropLetterBox: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomCropLetterBox(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomCropLetterBoxLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomCropLetterBox(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_crop_letterbox_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->x1,data->y1,data->x2,data->y2,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->x1,data->y1,data->x2,data->y2,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomCropLetterBox(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_crop_letterbox_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->x1,data->y1,data->x2,data->y2,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->x1,data->y1,data->x2,data->y2,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomCropLetterBox(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxLocalData * data = new RandomCropLetterBoxLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomCropLetterBox(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomCropLetterBox(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RandomCropLetterBox_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomCropLetterBox", - VX_KERNEL_RPP_RANDOMCROPLETTERBOX, - processRandomCropLetterBox, - 7, - validateRandomCropLetterBox, - initializeRandomCropLetterBox, - uninitializeRandomCropLetterBox); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPD.cpp index b77e94bfde..57d3f1cb27 100644 --- a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPD.cpp @@ -22,230 +22,298 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct RandomCropLetterBoxbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; +struct RandomCropLetterBoxbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *x1; + vx_uint32 *y1; + vx_uint32 *x2; + vx_uint32 *y2; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomCropLetterBoxbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateRandomCropLetterBoxbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomCropLetterBoxbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomCropLetterBoxbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processRandomCropLetterBoxbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomCropLetterBoxbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + RandomCropLetterBoxbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->y1, data->x2, data->y2, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RandomCropLetterBoxbatchPDLocalData * data = new RandomCropLetterBoxbatchPDLocalData; - memset(data, 0, sizeof(*data)); + RandomCropLetterBoxbatchPDLocalData *data = new RandomCropLetterBoxbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); + data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshRandomCropLetterBoxbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeRandomCropLetterBoxbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RandomCropLetterBoxbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RandomCropLetterBoxbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->x1); + free(data->x2); + free(data->y1); + free(data->y2); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status RandomCropLetterBoxbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomCropLetterBoxbatchPD", - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD, - processRandomCropLetterBoxbatchPD, - 12, - validateRandomCropLetterBoxbatchPD, - initializeRandomCropLetterBoxbatchPD, - uninitializeRandomCropLetterBoxbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomCropLetterBoxbatchPD", + VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPD, + processRandomCropLetterBoxbatchPD, + 12, + validateRandomCropLetterBoxbatchPD, + initializeRandomCropLetterBoxbatchPD, + uninitializeRandomCropLetterBoxbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPDROID.cpp deleted file mode 100644 index 151c72f3fd..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPDROID.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomCropLetterBoxbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomCropLetterBoxbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomCropLetterBoxbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[15], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[11], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[12], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[13], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[14], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomCropLetterBoxbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[15], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #15 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[16], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #16 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomCropLetterBoxbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomCropLetterBoxbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomCropLetterBoxbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomCropLetterBoxbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomCropLetterBoxbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomCropLetterBoxbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxbatchPDROIDLocalData * data = new RandomCropLetterBoxbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[16], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomCropLetterBoxbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomCropLetterBoxbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RandomCropLetterBoxbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomCropLetterBoxbatchPDROID", - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPDROID, - processRandomCropLetterBoxbatchPDROID, - 17, - validateRandomCropLetterBoxbatchPDROID, - initializeRandomCropLetterBoxbatchPDROID, - uninitializeRandomCropLetterBoxbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 14, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 15, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 16, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPS.cpp deleted file mode 100644 index 8d7da398dd..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RandomCropLetterBoxbatchPS.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomCropLetterBoxbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u x1; - Rpp32u y1; - Rpp32u x2; - Rpp32u y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomCropLetterBoxbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomCropLetterBoxbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomCropLetterBoxbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomCropLetterBoxbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomCropLetterBoxbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomCropLetterBoxbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomCropLetterBoxbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomCropLetterBoxbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_crop_letterbox_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_crop_letterbox_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomCropLetterBoxbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxbatchPSLocalData * data = new RandomCropLetterBoxbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomCropLetterBoxbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomCropLetterBoxbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomCropLetterBoxbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RandomCropLetterBoxbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomCropLetterBoxbatchPS", - VX_KERNEL_RPP_RANDOMCROPLETTERBOXBATCHPS, - processRandomCropLetterBoxbatchPS, - 12, - validateRandomCropLetterBoxbatchPS, - initializeRandomCropLetterBoxbatchPS, - uninitializeRandomCropLetterBoxbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RandomShadow.cpp b/amd_openvx_extensions/amd_rpp/source/RandomShadow.cpp deleted file mode 100644 index 1442ba4d20..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RandomShadow.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomShadowLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u x1; - Rpp32u y1; - Rpp32u x2; - Rpp32u y2; - Rpp32u numberOfShadows; - Rpp32u maxSizeX; - Rpp32u maxSizeY; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomShadow(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomShadowLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->numberOfShadows)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->maxSizeX)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->maxSizeY)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomShadow(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomShadow: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomShadow(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomShadowLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomShadow(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_shadow_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_shadow_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomShadow(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_shadow_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_shadow_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomShadow(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowLocalData * data = new RandomShadowLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[9], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomShadow(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomShadow(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RandomShadow_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomShadow", - VX_KERNEL_RPP_RANDOMSHADOW, - processRandomShadow, - 10, - validateRandomShadow, - initializeRandomShadow, - uninitializeRandomShadow); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPD.cpp index 370baa2fa6..7645f68a5e 100644 --- a/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPD.cpp @@ -22,231 +22,294 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct RandomShadowbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; - vx_uint32 *numberOfShadows; - vx_uint32 *maxSizeX; - vx_uint32 *maxSizeY; +struct RandomShadowbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *x1; + vx_uint32 *y1; + vx_uint32 *x2; + vx_uint32 *y2; + vx_uint32 *numberOfShadows; + vx_uint32 *maxSizeX; + vx_uint32 *maxSizeY; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomShadowbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->numberOfShadows = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->numberOfShadows, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->maxSizeX = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->maxSizeX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[10], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->maxSizeY = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, arr_size, sizeof(vx_uint32),data->maxSizeY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->numberOfShadows, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->maxSizeX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32), data->maxSizeY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateRandomShadowbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomShadowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomShadowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processRandomShadowbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomShadowbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + RandomShadowbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomShadowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_shadow_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_shadow_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshRandomShadowbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_random_shadow_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_random_shadow_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshRandomShadowbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_random_shadow_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_random_shadow_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomShadowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_shadow_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_shadow_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshRandomShadowbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_random_shadow_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_random_shadow_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->x1, data->y1, data->x2, data->y2, data->numberOfShadows, data->maxSizeX, data->maxSizeY, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RandomShadowbatchPDLocalData * data = new RandomShadowbatchPDLocalData; - memset(data, 0, sizeof(*data)); + RandomShadowbatchPDLocalData *data = new RandomShadowbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomShadowbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); + data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->numberOfShadows = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->maxSizeX = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->maxSizeY = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshRandomShadowbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeRandomShadowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RandomShadowbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RandomShadowbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->x1); + free(data->x2); + free(data->y1); + free(data->y2); + free(data->numberOfShadows); + free(data->maxSizeX); + free(data->maxSizeY); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status RandomShadowbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomShadowbatchPD", - VX_KERNEL_RPP_RANDOMSHADOWBATCHPD, - processRandomShadowbatchPD, - 13, - validateRandomShadowbatchPD, - initializeRandomShadowbatchPD, - uninitializeRandomShadowbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomShadowbatchPD", + VX_KERNEL_RPP_RANDOMSHADOWBATCHPD, + processRandomShadowbatchPD, + 13, + validateRandomShadowbatchPD, + initializeRandomShadowbatchPD, + uninitializeRandomShadowbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPDROID.cpp deleted file mode 100644 index 971dec4710..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPDROID.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomShadowbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; - vx_uint32 *numberOfShadows; - vx_uint32 *maxSizeX; - vx_uint32 *maxSizeY; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomShadowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomShadowbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint32),data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->numberOfShadows = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->numberOfShadows, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->maxSizeX = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->maxSizeX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[10], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->maxSizeY = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, arr_size, sizeof(vx_uint32),data->maxSizeY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[15], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[11], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[12], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[13], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[14], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomShadowbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[15], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #15 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[16], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #16 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomShadowbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomShadowbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomShadowbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomShadowbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_random_shadow_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_random_shadow_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomShadowbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_random_shadow_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_random_shadow_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomShadowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowbatchPDROIDLocalData * data = new RandomShadowbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[16], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomShadowbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomShadowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RandomShadowbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomShadowbatchPDROID", - VX_KERNEL_RPP_RANDOMSHADOWBATCHPDROID, - processRandomShadowbatchPDROID, - 17, - validateRandomShadowbatchPDROID, - initializeRandomShadowbatchPDROID, - uninitializeRandomShadowbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 14, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 15, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 16, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPS.cpp deleted file mode 100644 index 05fd7e2a2d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RandomShadowbatchPS.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RandomShadowbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u x1; - Rpp32u y1; - Rpp32u x2; - Rpp32u y2; - Rpp32u numberOfShadows; - Rpp32u maxSizeX; - Rpp32u maxSizeY; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRandomShadowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, RandomShadowbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->numberOfShadows)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->maxSizeX)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->maxSizeY)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRandomShadowbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RandomShadowbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRandomShadowbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RandomShadowbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRandomShadowbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_shadow_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_shadow_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRandomShadowbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_random_shadow_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_random_shadow_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->x1,data->y1,data->x2,data->y2,data->numberOfShadows,data->maxSizeX,data->maxSizeY,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRandomShadowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowbatchPSLocalData * data = new RandomShadowbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRandomShadowbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRandomShadowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RandomShadowbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RandomShadowbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RandomShadowbatchPS", - VX_KERNEL_RPP_RANDOMSHADOWBATCHPS, - processRandomShadowbatchPS, - 13, - validateRandomShadowbatchPS, - initializeRandomShadowbatchPS, - uninitializeRandomShadowbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Remap.cpp b/amd_openvx_extensions/amd_rpp/source/Remap.cpp index f9fdff64e0..d3e9635d87 100644 --- a/amd_openvx_extensions/amd_rpp/source/Remap.cpp +++ b/amd_openvx_extensions/amd_rpp/source/Remap.cpp @@ -22,187 +22,240 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct remapLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *rowRemap; - Rpp32u *colRemap; +struct remapLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + RppiSize srcDimensions; + Rpp32u device_type; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32u *rowRemap; + Rpp32u *colRemap; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshremap(vx_node node, const vx_reference *parameters, vx_uint32 num, remapLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->rowRemap = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp32u),data->rowRemap, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[3], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->colRemap = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, arr_size, sizeof(Rpp32u),data->colRemap, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); + size_t arr_size; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + data->rowRemap = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp32u), data->rowRemap, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[3], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + data->colRemap = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, arr_size, sizeof(Rpp32u), data->colRemap, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateremap(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: remap: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: remap: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processremap(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - remapLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshremap(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_remap_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rowRemap,data->colRemap,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_remap_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rowRemap,data->colRemap,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshremap(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_remap_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->rowRemap,data->colRemap,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_remap_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->rowRemap,data->colRemap,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; +static vx_status VX_CALLBACK processremap(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + remapLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // refreshremap(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_remap_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rowRemap,data->colRemap,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_remap_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->rowRemap,data->colRemap,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #elif ENABLE_HIP + // refreshremap(node, parameters, num, data); + // if (df_image == VX_DF_IMAGE_U8 ){ + // rpp_status = rppi_remap_u8_pln1_gpu((void *)data->hip_pSrc,data->srcDimensions,(void *)data->hip_pDst,data->rowRemap,data->colRemap,data->rppHandle); + // } + // else if(df_image == VX_DF_IMAGE_RGB) { + // rpp_status = rppi_remap_u8_pkd3_gpu((void *)data->hip_pSrc,data->srcDimensions,(void *)data->hip_pDst,data->rowRemap,data->colRemap,data->rppHandle); + // } + // return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshremap(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_remap_u8_pln1_host(data->pSrc, data->srcDimensions, data->pDst, data->rowRemap, data->colRemap, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_remap_u8_pkd3_host(data->pSrc, data->srcDimensions, data->pDst, data->rowRemap, data->colRemap, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeremap(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeremap(vx_node node, const vx_reference *parameters, vx_uint32 num) { - remapLocalData * data = new remapLocalData; - memset(data, 0, sizeof(*data)); + remapLocalData *data = new remapLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshremap(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshremap(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeremap(vx_node node, const vx_reference *parameters, vx_uint32 num) { - remapLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + remapLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status remap_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.remap", - VX_KERNEL_RPP_REMAP, - processremap, - 5, - validateremap, - initializeremap, - uninitializeremap); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.remap", + VX_KERNEL_RPP_REMAP, + processremap, + 5, + validateremap, + initializeremap, + uninitializeremap); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/Resize.cpp b/amd_openvx_extensions/amd_rpp/source/Resize.cpp deleted file mode 100644 index b245d54ffe..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Resize.cpp +++ /dev/null @@ -1,199 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResize(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResize(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Resize: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResize(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResize(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResize(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResize(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeLocalData * data = new ResizeLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[2], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResize(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResize(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Resize_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Resize", - VX_KERNEL_RPP_RESIZE, - processResize, - 3, - validateResize, - initializeResize, - uninitializeResize); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ResizeCrop.cpp b/amd_openvx_extensions/amd_rpp/source/ResizeCrop.cpp deleted file mode 100644 index f3cdcd8215..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ResizeCrop.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeCropLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u x1; - Rpp32u y1; - Rpp32u x2; - Rpp32u y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->y2)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizeCrop(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCrop: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizeCrop(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizeCrop(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_crop_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->x1,data->x2,data->y1,data->y2,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_crop_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->x1,data->x2,data->y1,data->y2,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResizeCrop(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_crop_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->x1,data->x2,data->y1,data->y2,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_crop_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->x1,data->x2,data->y1,data->y2,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropLocalData * data = new ResizeCropLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizeCrop(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizeCrop(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ResizeCrop_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCrop", - VX_KERNEL_RPP_RESIZECROP, - processResizeCrop, - 7, - validateResizeCrop, - initializeResizeCrop, - uninitializeResizeCrop); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ResizeCropMirrorPD.cpp b/amd_openvx_extensions/amd_rpp/source/ResizeCropMirrorPD.cpp index ddd79e47d6..005c066e58 100644 --- a/amd_openvx_extensions/amd_rpp/source/ResizeCropMirrorPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ResizeCropMirrorPD.cpp @@ -24,271 +24,303 @@ THE SOFTWARE. struct ResizeCropMirrorPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; - vx_uint32 *mirrorFlag; + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *x1; + vx_uint32 *y1; + vx_uint32 *x2; + vx_uint32 *y2; + vx_uint32 *mirrorFlag; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropMirrorPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[10], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->mirrorFlag = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, arr_size, sizeof(vx_uint32), data->mirrorFlag, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(vx_uint32), data->mirrorFlag, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateResizeCropMirrorPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropMirrorPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropMirrorPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } static vx_status VX_CALLBACK processResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropMirrorPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ResizeCropMirrorPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + vx_int32 output_format_toggle = 0; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizeCropMirrorPD(node, parameters, num, data); + refreshResizeCropMirrorPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshResizeCropMirrorPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshResizeCropMirrorPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizeCropMirrorPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshResizeCropMirrorPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { - rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + rpp_status = rppi_resize_crop_mirror_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_crop_mirror_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, data->mirrorFlag, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } static vx_status VX_CALLBACK initializeResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ResizeCropMirrorPDLocalData *data = new ResizeCropMirrorPDLocalData; - memset(data, 0, sizeof(*data)); + ResizeCropMirrorPDLocalData *data = new ResizeCropMirrorPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizeCropMirrorPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); + data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->mirrorFlag = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshResizeCropMirrorPD(node, parameters, num, data); #if ENABLE_OPENCL - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeResizeCropMirrorPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ResizeCropMirrorPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ResizeCropMirrorPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->x1); + free(data->x2); + free(data->y1); + free(data->y2); + free(data->mirrorFlag); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete (data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ResizeCropMirrorPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropMirrorPD", - VX_KERNEL_RPP_RESIZECROPMIRRORPD, - processResizeCropMirrorPD, - 13, - validateResizeCropMirrorPD, - initializeResizeCropMirrorPD, - uninitializeResizeCropMirrorPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropMirrorPD", + VX_KERNEL_RPP_RESIZECROPMIRRORPD, + processResizeCropMirrorPD, + 13, + validateResizeCropMirrorPD, + initializeResizeCropMirrorPD, + uninitializeResizeCropMirrorPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPD.cpp index fdab5e842b..ed3c0e2067 100644 --- a/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPD.cpp @@ -24,264 +24,296 @@ THE SOFTWARE. struct ResizeCropbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *x1; + vx_uint32 *y1; + vx_uint32 *x2; + vx_uint32 *y2; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_uint32), data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(vx_uint32), data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(vx_uint32), data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(vx_uint32), data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateResizeCropbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } static vx_status VX_CALLBACK processResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropbatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ResizeCropbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + vx_int32 output_format_toggle = 0; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizeCropbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshResizeCropbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshResizeCropbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshResizeCropbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_crop_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_crop_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizeCropbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_crop_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_crop_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshResizeCropbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_crop_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_crop_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->x1, data->x2, data->y1, data->y2, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } static vx_status VX_CALLBACK initializeResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ResizeCropbatchPDLocalData *data = new ResizeCropbatchPDLocalData; - memset(data, 0, sizeof(*data)); + ResizeCropbatchPDLocalData *data = new ResizeCropbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizeCropbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); + data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshResizeCropbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeResizeCropbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ResizeCropbatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL|| ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + ResizeCropbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->x1); + free(data->x2); + free(data->y1); + free(data->y2); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete (data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ResizeCropbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropbatchPD", - VX_KERNEL_RPP_RESIZECROPBATCHPD, - processResizeCropbatchPD, - 12, - validateResizeCropbatchPD, - initializeResizeCropbatchPD, - uninitializeResizeCropbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL|| ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropbatchPD", + VX_KERNEL_RPP_RESIZECROPBATCHPD, + processResizeCropbatchPD, + 12, + validateResizeCropbatchPD, + initializeResizeCropbatchPD, + uninitializeResizeCropbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPDROID.cpp deleted file mode 100644 index 76893986ab..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPDROID.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeCropbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *x1; - vx_uint32 *y1; - vx_uint32 *x2; - vx_uint32 *y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizeCropbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_uint32),data->x1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[7], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y1 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, arr_size, sizeof(vx_uint32),data->y1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[8], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->x2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, arr_size, sizeof(vx_uint32),data->x2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[9], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->y2 = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, arr_size, sizeof(vx_uint32),data->y2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[15], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[11], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[12], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[13], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[14], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizeCropbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[15], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #15 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[16], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #16 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizeCropbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizeCropbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_resize_crop_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_resize_crop_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResizeCropbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_resize_crop_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_resize_crop_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->roiPoints,data->nbatchSize,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizeCropbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropbatchPDROIDLocalData * data = new ResizeCropbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[16], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizeCropbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizeCropbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ResizeCropbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropbatchPDROID", - VX_KERNEL_RPP_RESIZECROPBATCHPDROID, - processResizeCropbatchPDROID, - 17, - validateResizeCropbatchPDROID, - initializeResizeCropbatchPDROID, - uninitializeResizeCropbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 13, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 14, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 15, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 16, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPS.cpp deleted file mode 100644 index 5d403c64f5..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ResizeCropbatchPS.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizeCropbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u x1; - Rpp32u y1; - Rpp32u x2; - Rpp32u y2; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizeCropbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizeCropbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->x1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->y1)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->x2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->y2)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizeCropbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizeCropbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizeCropbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizeCropbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizeCropbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_crop_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_crop_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResizeCropbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_crop_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_crop_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->x1,data->y1,data->x2,data->y2,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizeCropbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropbatchPSLocalData * data = new ResizeCropbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizeCropbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizeCropbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizeCropbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ResizeCropbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizeCropbatchPS", - VX_KERNEL_RPP_RESIZECROPBATCHPS, - processResizeCropbatchPS, - 12, - validateResizeCropbatchPS, - initializeResizeCropbatchPS, - uninitializeResizeCropbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ResizebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ResizebatchPD.cpp index ad04479869..9f9be5ed52 100644 --- a/amd_openvx_extensions/amd_rpp/source/ResizebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ResizebatchPD.cpp @@ -24,278 +24,276 @@ THE SOFTWARE. struct ResizebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u *srcBatch_width; - Rpp32u *srcBatch_height; - Rpp32u *dstBatch_width; - Rpp32u *dstBatch_height; + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - for (int i = 0; i < data->nbatchSize; i++) - { - data->srcDimensions[i].width = data->srcBatch_width[i]; - data->srcDimensions[i].height = data->srcBatch_height[i]; - data->dstDimensions[i].width = data->dstBatch_width[i]; - data->dstDimensions[i].height = data->dstBatch_height[i]; - } - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateResizebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node, 0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node, 3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } static vx_status VX_CALLBACK processResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizebatchPDLocalData *data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vx_int32 output_format_toggle = 0; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ResizebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vx_int32 output_format_toggle = 0; + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshResizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshResizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshResizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshResizebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_resize_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_resize_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshResizebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_resize_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_resize_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } static vx_status VX_CALLBACK initializeResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ResizebatchPDLocalData *data = new ResizebatchPDLocalData; - memset(data, 0, sizeof(*data)); + ResizebatchPDLocalData *data = new ResizebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - refreshResizebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshResizebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeResizebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ResizebatchPDLocalData *data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ResizebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - free(data->srcDimensions); - free(data->dstDimensions); - free(data->srcBatch_width); - free(data->srcBatch_height); - free(data->dstBatch_width); - free(data->dstBatch_height); - delete (data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + delete (data); + return VX_SUCCESS; } //! \brief The kernel target support callback. // TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32& supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) - ) + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { - vx_context context = vxGetContext((vx_reference)graph); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - supported_target_affinity = AGO_TARGET_AFFINITY_GPU; - else - supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; - // hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes + // hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -#endif - return VX_SUCCESS; +#endif + return VX_SUCCESS; } vx_status ResizebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizebatchPD", - VX_KERNEL_RPP_RESIZEBATCHPD, - processResizebatchPD, - 8, - validateResizebatchPD, - initializeResizebatchPD, - uninitializeResizebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizebatchPD", + VX_KERNEL_RPP_RESIZEBATCHPD, + processResizebatchPD, + 8, + validateResizebatchPD, + initializeResizebatchPD, + uninitializeResizebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - amd_kernel_query_target_support_f query_target_support_f = query_target_support; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: - vxRemoveKernel(kernel); - return VX_FAILURE; - } - return status; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ResizebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ResizebatchPDROID.cpp deleted file mode 100644 index 1ed3a7df64..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ResizebatchPDROID.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResizebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizebatchPDROIDLocalData * data = new ResizebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ResizebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizebatchPDROID", - VX_KERNEL_RPP_RESIZEBATCHPDROID, - processResizebatchPDROID, - 12, - validateResizebatchPDROID, - initializeResizebatchPDROID, - uninitializeResizebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ResizebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ResizebatchPS.cpp deleted file mode 100644 index 2b7f401b7b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ResizebatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ResizebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshResizebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizebatchPSLocalData *data) -{ - // std::cerr<<"\n refresh is called"; - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateResizebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ResizebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processResizebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ResizebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshResizebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshResizebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_resize_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_resize_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeResizebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizebatchPSLocalData * data = new ResizebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshResizebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeResizebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ResizebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ResizebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ResizebatchPS", - VX_KERNEL_RPP_RESIZEBATCHPS, - processResizebatchPS, - 8, - validateResizebatchPS, - initializeResizebatchPS, - uninitializeResizebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Rotate.cpp b/amd_openvx_extensions/amd_rpp/source/Rotate.cpp deleted file mode 100644 index 2cefea8956..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Rotate.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RotateLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f angle; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRotate(vx_node node, const vx_reference *parameters, vx_uint32 num, RotateLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->angle)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRotate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Rotate: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRotate(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RotateLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRotate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rotate_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->angle,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rotate_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->angle,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRotate(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rotate_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->angle,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rotate_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->angle,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRotate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotateLocalData * data = new RotateLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRotate(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRotate(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotateLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Rotate_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Rotate", - VX_KERNEL_RPP_ROTATE, - processRotate, - 4, - validateRotate, - initializeRotate, - uninitializeRotate); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RotatebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/RotatebatchPD.cpp index 46b2292fd1..e3b5235748 100644 --- a/amd_openvx_extensions/amd_rpp/source/RotatebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/RotatebatchPD.cpp @@ -22,242 +22,283 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct RotatebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *angle; +struct RotatebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *angle; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, RotatebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->angle = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->angle, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->angle, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateRotatebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RotatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RotatebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processRotatebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RotatebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { +static vx_status VX_CALLBACK processRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + RotatebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + vx_int32 output_format_toggle = 0; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRotatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rotate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rotate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshRotatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_rotate_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_rotate_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshRotatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rotate_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rotate_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshRotatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_rotate_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_rotate_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - refreshRotatebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8) - { - rpp_status = rppi_rotate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); - } - else if (df_image == VX_DF_IMAGE_RGB) - { - rpp_status = rppi_rotate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshRotatebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_rotate_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_rotate_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->angle, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RotatebatchPDLocalData * data = new RotatebatchPDLocalData; - memset(data, 0, sizeof(*data)); + RotatebatchPDLocalData *data = new RotatebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRotatebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); + data->angle = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshRotatebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeRotatebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RotatebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RotatebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->angle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status RotatebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RotatebatchPD", - VX_KERNEL_RPP_ROTATEBATCHPD, - processRotatebatchPD, - 9, - validateRotatebatchPD, - initializeRotatebatchPD, - uninitializeRotatebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RotatebatchPD", + VX_KERNEL_RPP_ROTATEBATCHPD, + processRotatebatchPD, + 9, + validateRotatebatchPD, + initializeRotatebatchPD, + uninitializeRotatebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/RotatebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/RotatebatchPDROID.cpp deleted file mode 100644 index 050d31a751..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RotatebatchPDROID.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RotatebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *angle; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRotatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, RotatebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->angle = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->angle, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRotatebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RotatebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRotatebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RotatebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRotatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rotate_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rotate_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRotatebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rotate_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rotate_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRotatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotatebatchPDROIDLocalData * data = new RotatebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRotatebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRotatebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotatebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RotatebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RotatebatchPDROID", - VX_KERNEL_RPP_ROTATEBATCHPDROID, - processRotatebatchPDROID, - 13, - validateRotatebatchPDROID, - initializeRotatebatchPDROID, - uninitializeRotatebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/RotatebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/RotatebatchPS.cpp deleted file mode 100644 index 89d560b1e9..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/RotatebatchPS.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct RotatebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f angle; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshRotatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, RotatebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->angle)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateRotatebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: RotatebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processRotatebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - RotatebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshRotatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rotate_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rotate_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshRotatebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_rotate_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_rotate_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->angle,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeRotatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotatebatchPSLocalData * data = new RotatebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshRotatebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeRotatebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - RotatebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status RotatebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.RotatebatchPS", - VX_KERNEL_RPP_ROTATEBATCHPS, - processRotatebatchPS, - 9, - validateRotatebatchPS, - initializeRotatebatchPS, - uninitializeRotatebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Saturation.cpp b/amd_openvx_extensions/amd_rpp/source/Saturation.cpp deleted file mode 100644 index 26d44294be..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Saturation.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SaturationLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f saturationFactor; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->saturationFactor)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSaturation(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Saturation: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSaturation(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SaturationLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSaturation(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_saturationRGB_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_saturationRGB_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSaturation(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_saturationRGB_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->saturationFactor,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_saturationRGB_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->saturationFactor,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationLocalData * data = new SaturationLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSaturation(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSaturation(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Saturation_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Saturation", - VX_KERNEL_RPP_SATURATION, - processSaturation, - 4, - validateSaturation, - initializeSaturation, - uninitializeSaturation); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SaturationbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/SaturationbatchPD.cpp index eca6005dae..039599bfe2 100644 --- a/amd_openvx_extensions/amd_rpp/source/SaturationbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/SaturationbatchPD.cpp @@ -22,220 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct SaturationbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *saturationFactor; +struct SaturationbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *saturationFactor; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->saturationFactor = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->saturationFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->saturationFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateSaturationbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SaturationbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SaturationbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processSaturationbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SaturationbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + SaturationbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSaturationbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_saturationRGB_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshSaturationbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_saturationRGB_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->saturationFactor, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->saturationFactor, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshSaturationbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_saturationRGB_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshSaturationbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_saturationRGB_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->saturationFactor, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->saturationFactor, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSaturationbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_saturationRGB_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshSaturationbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_saturationRGB_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->saturationFactor, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->saturationFactor, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SaturationbatchPDLocalData * data = new SaturationbatchPDLocalData; - memset(data, 0, sizeof(*data)); + SaturationbatchPDLocalData *data = new SaturationbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSaturationbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->saturationFactor = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshSaturationbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeSaturationbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SaturationbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + SaturationbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->saturationFactor); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status SaturationbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SaturationbatchPD", - VX_KERNEL_RPP_SATURATIONBATCHPD, - processSaturationbatchPD, - 7, - validateSaturationbatchPD, - initializeSaturationbatchPD, - uninitializeSaturationbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SaturationbatchPD", + VX_KERNEL_RPP_SATURATIONBATCHPD, + processSaturationbatchPD, + 7, + validateSaturationbatchPD, + initializeSaturationbatchPD, + uninitializeSaturationbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/SaturationbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/SaturationbatchPDROID.cpp deleted file mode 100644 index 249419bf6d..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SaturationbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SaturationbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *saturationFactor; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSaturationbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->saturationFactor = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->saturationFactor, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSaturationbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SaturationbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSaturationbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SaturationbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSaturationbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_saturationRGB_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSaturationbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_saturationRGB_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->saturationFactor,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_saturationRGB_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->saturationFactor,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSaturationbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationbatchPDROIDLocalData * data = new SaturationbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSaturationbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSaturationbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SaturationbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SaturationbatchPDROID", - VX_KERNEL_RPP_SATURATIONBATCHPDROID, - processSaturationbatchPDROID, - 11, - validateSaturationbatchPDROID, - initializeSaturationbatchPDROID, - uninitializeSaturationbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SaturationbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/SaturationbatchPS.cpp deleted file mode 100644 index 694a00f738..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SaturationbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SaturationbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f saturationFactor; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSaturationbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, SaturationbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->saturationFactor)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSaturationbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SaturationbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSaturationbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SaturationbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSaturationbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_saturationRGB_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_saturationRGB_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSaturationbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_saturationRGB_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_saturationRGB_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->saturationFactor,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSaturationbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationbatchPSLocalData * data = new SaturationbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSaturationbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSaturationbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SaturationbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SaturationbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SaturationbatchPS", - VX_KERNEL_RPP_SATURATIONBATCHPS, - processSaturationbatchPS, - 7, - validateSaturationbatchPS, - initializeSaturationbatchPS, - uninitializeSaturationbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Scale.cpp b/amd_openvx_extensions/amd_rpp/source/Scale.cpp deleted file mode 100644 index b20b57dd95..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Scale.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ScaleLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f angle; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshScale(vx_node node, const vx_reference *parameters, vx_uint32 num, ScaleLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->angle)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateScale(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Scale: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processScale(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ScaleLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshScale(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->angle,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->angle,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshScale(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->angle,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->angle,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeScale(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScaleLocalData * data = new ScaleLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshScale(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeScale(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScaleLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Scale_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Scale", - VX_KERNEL_RPP_SCALE, - processScale, - 4, - validateScale, - initializeScale, - uninitializeScale); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ScalebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ScalebatchPD.cpp index fd8c5a2318..c2edeb2d45 100644 --- a/amd_openvx_extensions/amd_rpp/source/ScalebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ScalebatchPD.cpp @@ -22,215 +22,283 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ScalebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *percentage; +struct ScalebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *percentage; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ScalebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->percentage = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->percentage, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(vx_float32), data->percentage, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateScalebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ScalebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ScalebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processScalebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ScalebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ScalebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshScalebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshScalebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_scale_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_scale_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshScalebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_scale_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_scale_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshScalebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshScalebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_scale_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_scale_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->percentage, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ScalebatchPDLocalData * data = new ScalebatchPDLocalData; - memset(data, 0, sizeof(*data)); + ScalebatchPDLocalData *data = new ScalebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshScalebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); + data->percentage = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshScalebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeScalebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ScalebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ScalebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->percentage); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ScalebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ScalebatchPD", - VX_KERNEL_RPP_SCALEBATCHPD, - processScalebatchPD, - 9, - validateScalebatchPD, - initializeScalebatchPD, - uninitializeScalebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ScalebatchPD", + VX_KERNEL_RPP_SCALEBATCHPD, + processScalebatchPD, + 9, + validateScalebatchPD, + initializeScalebatchPD, + uninitializeScalebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ScalebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ScalebatchPDROID.cpp deleted file mode 100644 index c685af8c34..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ScalebatchPDROID.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ScalebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *percentage; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshScalebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ScalebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->percentage = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->percentage, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateScalebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #13 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ScalebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processScalebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ScalebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshScalebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshScalebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeScalebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScalebatchPDROIDLocalData * data = new ScalebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshScalebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeScalebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScalebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ScalebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ScalebatchPDROID", - VX_KERNEL_RPP_SCALEBATCHPDROID, - processScalebatchPDROID, - 13, - validateScalebatchPDROID, - initializeScalebatchPDROID, - uninitializeScalebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ScalebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ScalebatchPS.cpp deleted file mode 100644 index e2ae982ab0..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ScalebatchPS.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ScalebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f percentage; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshScalebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ScalebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->percentage)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateScalebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ScalebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processScalebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ScalebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshScalebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshScalebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_scale_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_scale_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->percentage,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeScalebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScalebatchPSLocalData * data = new ScalebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshScalebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeScalebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ScalebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ScalebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ScalebatchPS", - VX_KERNEL_RPP_SCALEBATCHPS, - processScalebatchPS, - 9, - validateScalebatchPS, - initializeScalebatchPS, - uninitializeScalebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Snow.cpp b/amd_openvx_extensions/amd_rpp/source/Snow.cpp deleted file mode 100644 index b508ac908c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Snow.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SnowLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f snowValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSnow(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->snowValue)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSnow(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Snow: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSnow(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SnowLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSnow(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->snowValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->snowValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSnow(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->snowValue,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->snowValue,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSnow(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowLocalData * data = new SnowLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSnow(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSnow(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Snow_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Snow", - VX_KERNEL_RPP_SNOW, - processSnow, - 4, - validateSnow, - initializeSnow, - uninitializeSnow); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SnowbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/SnowbatchPD.cpp index b7285edb6f..bc17941a8a 100644 --- a/amd_openvx_extensions/amd_rpp/source/SnowbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/SnowbatchPD.cpp @@ -22,220 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct SnowbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *snowValue; +struct SnowbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *snowValue; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->snowValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->snowValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->snowValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateSnowbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SnowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SnowbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processSnowbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SnowbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + SnowbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSnowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshSnowbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_snow_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->snowValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_snow_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->snowValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshSnowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshSnowbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_snow_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->snowValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_snow_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->snowValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSnowbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshSnowbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_snow_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->snowValue, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_snow_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->snowValue, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SnowbatchPDLocalData * data = new SnowbatchPDLocalData; - memset(data, 0, sizeof(*data)); + SnowbatchPDLocalData *data = new SnowbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSnowbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->snowValue = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshSnowbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeSnowbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SnowbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + SnowbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->snowValue); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status SnowbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SnowbatchPD", - VX_KERNEL_RPP_SNOWBATCHPD, - processSnowbatchPD, - 7, - validateSnowbatchPD, - initializeSnowbatchPD, - uninitializeSnowbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SnowbatchPD", + VX_KERNEL_RPP_SNOWBATCHPD, + processSnowbatchPD, + 7, + validateSnowbatchPD, + initializeSnowbatchPD, + uninitializeSnowbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/SnowbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/SnowbatchPDROID.cpp deleted file mode 100644 index 2c96467984..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SnowbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SnowbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *snowValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSnowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->snowValue = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->snowValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSnowbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SnowbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSnowbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SnowbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSnowbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_snow_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->snowValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_snow_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->snowValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSnowbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - // rpp_status = rppi_snow_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->snowValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - // rpp_status = rppi_snow_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->snowValue,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSnowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowbatchPDROIDLocalData * data = new SnowbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSnowbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSnowbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SnowbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SnowbatchPDROID", - VX_KERNEL_RPP_SNOWBATCHPDROID, - processSnowbatchPDROID, - 11, - validateSnowbatchPDROID, - initializeSnowbatchPDROID, - uninitializeSnowbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SnowbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/SnowbatchPS.cpp deleted file mode 100644 index 430d0afe0e..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SnowbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SnowbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f snowValue; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSnowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, SnowbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->snowValue)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSnowbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SnowbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSnowbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SnowbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSnowbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSnowbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_snow_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_snow_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->snowValue,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSnowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowbatchPSLocalData * data = new SnowbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSnowbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSnowbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SnowbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SnowbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SnowbatchPS", - VX_KERNEL_RPP_SNOWBATCHPS, - processSnowbatchPS, - 7, - validateSnowbatchPS, - initializeSnowbatchPS, - uninitializeSnowbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Sobel.cpp b/amd_openvx_extensions/amd_rpp/source/Sobel.cpp deleted file mode 100644 index 3d7ef29f4a..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Sobel.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SobelLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u sobelType; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSobel(vx_node node, const vx_reference *parameters, vx_uint32 num, SobelLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->sobelType)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSobel(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Sobel: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSobel(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SobelLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSobel(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->sobelType,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->sobelType,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSobel(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->sobelType,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->sobelType,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSobel(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelLocalData * data = new SobelLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSobel(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSobel(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Sobel_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Sobel", - VX_KERNEL_RPP_SOBEL, - processSobel, - 4, - validateSobel, - initializeSobel, - uninitializeSobel); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SobelbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/SobelbatchPD.cpp index 0180764980..6b650e079c 100644 --- a/amd_openvx_extensions/amd_rpp/source/SobelbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/SobelbatchPD.cpp @@ -22,201 +22,243 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct SobelbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *sobelType; +struct SobelbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint32 *sobelType; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#endif }; static vx_status VX_CALLBACK refreshSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SobelbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->sobelType = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->sobelType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint32), data->sobelType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateSobelbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SobelbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SobelbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processSobelbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SobelbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + SobelbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSobelbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshSobelbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_sobel_filter_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->sobelType, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->sobelType, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSobelbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshSobelbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_sobel_filter_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->sobelType, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->sobelType, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SobelbatchPDLocalData * data = new SobelbatchPDLocalData; - memset(data, 0, sizeof(*data)); + SobelbatchPDLocalData *data = new SobelbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSobelbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->sobelType = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshSobelbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeSobelbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SobelbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + SobelbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->sobelType); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status SobelbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SobelbatchPD", - VX_KERNEL_RPP_SOBELBATCHPD, - processSobelbatchPD, - 7, - validateSobelbatchPD, - initializeSobelbatchPD, - uninitializeSobelbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SobelbatchPD", + VX_KERNEL_RPP_SOBELBATCHPD, + processSobelbatchPD, + 7, + validateSobelbatchPD, + initializeSobelbatchPD, + uninitializeSobelbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/SobelbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/SobelbatchPDROID.cpp deleted file mode 100644 index b15c0987cd..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SobelbatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SobelbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint32 *sobelType; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSobelbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, SobelbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->sobelType = (vx_uint32 *)malloc(sizeof(vx_uint32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint32),data->sobelType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSobelbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SobelbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSobelbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SobelbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSobelbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->sobelType,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->sobelType,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSobelbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->sobelType,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->sobelType,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSobelbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelbatchPDROIDLocalData * data = new SobelbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSobelbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSobelbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SobelbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SobelbatchPDROID", - VX_KERNEL_RPP_SOBELBATCHPDROID, - processSobelbatchPDROID, - 11, - validateSobelbatchPDROID, - initializeSobelbatchPDROID, - uninitializeSobelbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SobelbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/SobelbatchPS.cpp deleted file mode 100644 index 9e421229d3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SobelbatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SobelbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32u sobelType; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSobelbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, SobelbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->sobelType)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSobelbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SobelbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSobelbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SobelbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSobelbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSobelbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_sobel_filter_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_sobel_filter_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->sobelType,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSobelbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelbatchPSLocalData * data = new SobelbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSobelbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSobelbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SobelbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SobelbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SobelbatchPS", - VX_KERNEL_RPP_SOBELBATCHPS, - processSobelbatchPS, - 7, - validateSobelbatchPS, - initializeSobelbatchPS, - uninitializeSobelbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Subtract.cpp b/amd_openvx_extensions/amd_rpp/source/Subtract.cpp deleted file mode 100644 index e09b753db3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Subtract.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SubtractLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num, SubtractLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[2], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSubtract(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Subtract: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Subtract: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,2); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSubtract(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SubtractLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSubtract(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,(void *)data->cl_pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSubtract(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->pDst,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractLocalData * data = new SubtractLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSubtract(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Subtract_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Subtract", - VX_KERNEL_RPP_SUBTRACT, - processSubtract, - 4, - validateSubtract, - initializeSubtract, - uninitializeSubtract); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SubtractbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/SubtractbatchPD.cpp index c7ad09e53a..b8b93ffd88 100644 --- a/amd_openvx_extensions/amd_rpp/source/SubtractbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/SubtractbatchPD.cpp @@ -22,209 +22,274 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct SubtractbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; +struct SubtractbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc1; + RppPtr_t pSrc2; + RppPtr_t pDst; #if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc1; + cl_mem cl_pSrc2; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, SubtractbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc1, sizeof(data->hip_pSrc1))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc2, sizeof(data->hip_pSrc2))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateSubtractbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + input_param = vxGetParameterByIndex(node, 1); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPD: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 4); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processSubtractbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SubtractbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + SubtractbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSubtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshSubtractbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_subtract_u8_pln1_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_subtract_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshSubtractbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_subtract_u8_pln1_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_subtract_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSubtractbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_batchPD_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshSubtractbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_subtract_u8_pln1_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_subtract_u8_pkd3_batchPD_host(data->pSrc1, data->pSrc2, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SubtractbatchPDLocalData * data = new SubtractbatchPDLocalData; - memset(data, 0, sizeof(*data)); + SubtractbatchPDLocalData *data = new SubtractbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSubtractbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshSubtractbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeSubtractbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - SubtractbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + SubtractbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcBatch_height); + free(data->srcBatch_width); + free(data->srcDimensions); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status SubtractbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SubtractbatchPD", - VX_KERNEL_RPP_SUBTRACTBATCHPD, - processSubtractbatchPD, - 7, - validateSubtractbatchPD, - initializeSubtractbatchPD, - uninitializeSubtractbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SubtractbatchPD", + VX_KERNEL_RPP_SUBTRACTBATCHPD, + processSubtractbatchPD, + 7, + validateSubtractbatchPD, + initializeSubtractbatchPD, + uninitializeSubtractbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/SubtractbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/SubtractbatchPDROID.cpp deleted file mode 100644 index fa773f46af..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SubtractbatchPDROID.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SubtractbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSubtractbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, SubtractbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSubtractbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPDROID: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSubtractbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SubtractbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSubtractbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSubtractbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_batchPD_ROID_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSubtractbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractbatchPDROIDLocalData * data = new SubtractbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSubtractbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSubtractbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SubtractbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SubtractbatchPDROID", - VX_KERNEL_RPP_SUBTRACTBATCHPDROID, - processSubtractbatchPDROID, - 11, - validateSubtractbatchPDROID, - initializeSubtractbatchPDROID, - uninitializeSubtractbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/SubtractbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/SubtractbatchPS.cpp deleted file mode 100644 index f84a249cb3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/SubtractbatchPS.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct SubtractbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc1; - RppPtr_t pSrc2; - RppPtr_t pDst; -#if ENABLE_OPENCL - cl_mem cl_pSrc1; - cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshSubtractbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, SubtractbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc1, sizeof(data->cl_pSrc1))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc2, sizeof(data->cl_pSrc2))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc1, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc2, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[4], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateSubtractbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - input_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: SubtractbatchPS: image: #1 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,4); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[4], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processSubtractbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - SubtractbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshSubtractbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc1,(void *)data->cl_pSrc2,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshSubtractbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_subtract_u8_pln1_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_subtract_u8_pkd3_batchPS_host(data->pSrc1,data->pSrc2,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeSubtractbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractbatchPSLocalData * data = new SubtractbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshSubtractbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeSubtractbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - SubtractbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status SubtractbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SubtractbatchPS", - VX_KERNEL_RPP_SUBTRACTBATCHPS, - processSubtractbatchPS, - 7, - validateSubtractbatchPS, - initializeSubtractbatchPS, - uninitializeSubtractbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/TensorAdd.cpp b/amd_openvx_extensions/amd_rpp/source/TensorAdd.cpp index bcb30ef88b..e6843475dd 100644 --- a/amd_openvx_extensions/amd_rpp/source/TensorAdd.cpp +++ b/amd_openvx_extensions/amd_rpp/source/TensorAdd.cpp @@ -22,184 +22,260 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct TensorAddLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp8u *pSrc1; +struct TensorAddLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp8u *pSrc1; Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; + Rpp8u *pDst; + Rpp32u tensorDimensions; + Rpp32u *tensorDimensionsValue; #if ENABLE_OPENCL - cl_mem cl_pSrc1; + cl_mem cl_pSrc1; cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorAddLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u),data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u),data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u),data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, + cl_context theContext; + cl_command_queue theQueue; + theQueue = data->handle.cmdq; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + cl_int err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, bytes, data->pSrc1, 0, NULL, NULL); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, + if (err) + return VX_FAILURE; + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, bytes, data->pSrc2, 0, NULL, NULL); + if (err) + return VX_FAILURE; +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t err; + err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; + err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; #endif - } - - return status; + } + + return status; } static vx_status VX_CALLBACK validateTensorAdd(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); + return status; } -static vx_status VX_CALLBACK processTensorAdd(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorAddLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + TensorAddLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); size_t arr_size; - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshTensorAdd(node, parameters, num, data); - rpp_status = rppi_tensor_add_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue,data->rppHandle); + refreshTensorAdd(node, parameters, num, data); + rpp_status = rppi_tensor_add_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); cl_command_queue theQueue; theQueue = data->handle.cmdq; cl_int err; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); + clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshTensorAdd(node, parameters, num, data); + rpp_status = rppi_tensor_add_u8_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, (void *)data->hip_pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); + hipError_t err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); + if (err != hipSuccess) + return VX_FAILURE; return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshTensorAdd(node, parameters, num, data); - rpp_status = rppi_tensor_add_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshTensorAdd(node, parameters, num, data); + rpp_status = rppi_tensor_add_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u),data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); - return return_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); + return return_status; } -static vx_status VX_CALLBACK initializeTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorAddLocalData * data = new TensorAddLocalData; - memset(data, 0, sizeof(*data)); + TensorAddLocalData *data = new TensorAddLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + cl_context theContext; // theContext + cl_command_queue theQueue; // command theQueue theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + size_t arr_size; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t status; + status = hipMalloc(&data->hip_pSrc1, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pSrc2, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pDst, bytes); + if (status != hipSuccess) + return VX_FAILURE; #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); refreshTensorAdd(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeTensorAdd(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorAddLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + TensorAddLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status TensorAdd_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorAdd", - VX_KERNEL_RPP_TENSORADD, - processTensorAdd, - 6, - validateTensorAdd, - initializeTensorAdd, - uninitializeTensorAdd); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorAdd", + VX_KERNEL_RPP_TENSORADD, + processTensorAdd, + 6, + validateTensorAdd, + initializeTensorAdd, + uninitializeTensorAdd); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT , VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/TensorLookup.cpp b/amd_openvx_extensions/amd_rpp/source/TensorLookup.cpp index d652594c2c..d5dec551ec 100644 --- a/amd_openvx_extensions/amd_rpp/source/TensorLookup.cpp +++ b/amd_openvx_extensions/amd_rpp/source/TensorLookup.cpp @@ -22,176 +22,208 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct TensorLookupLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp8u *pSrc; +struct TensorLookupLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp8u *pSrc; Rpp8u *luPtr; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; + Rpp8u *pDst; + Rpp32u tensorDimensions; + Rpp32u *tensorDimensionsValue; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#endif }; static vx_status VX_CALLBACK refreshTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorLookupLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; // Input STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u),data->pSrc, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + data->pSrc = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); //Output STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - // lutptr STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->luPtr = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u),data->luPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->luPtr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); // tensor dim values STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u),data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc, CL_TRUE, 0, + cl_context theContext; + cl_command_queue theQueue; + theQueue = data->handle.cmdq; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + cl_int err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc, CL_TRUE, 0, bytes, data->pSrc, 0, NULL, NULL); #endif - } - - return status; + } + + return status; } static vx_status VX_CALLBACK validateTensorLookup(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); + return status; } -static vx_status VX_CALLBACK processTensorLookup(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorLookupLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + TensorLookupLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); size_t arr_size; - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshTensorLookup(node, parameters, num, data); - // rpp_status = rppi_tensor_look_up_table_u8_gpu((void *)data->cl_pSrc,(void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue,data->luPtr,data->rppHandle); - cl_command_queue theQueue; - theQueue = data->handle.cmdq; - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshTensorLookup(node, parameters, num, data); - // rpp_status = rppi_tensor_look_up_table_u8_host(data->pSrc, data->pDst, data->tensorDimensions, data->tensorDimensionsValue,data->luPtr,data->rppHandle); - } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { + // #if ENABLE_OPENCL + // cl_command_queue handle = data->handle.cmdq; + // refreshTensorLookup(node, parameters, num, data); + // rpp_status = rppi_tensor_look_up_table_u8_gpu((void *)data->cl_pSrc,(void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue,data->luPtr,data->rppHandle); + // cl_command_queue theQueue; + // theQueue = data->handle.cmdq; + // cl_int err; + // STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + // size_t bytes = arr_size * sizeof(Rpp8u); + // clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); + // #endif + return VX_ERROR_NOT_IMPLEMENTED; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshTensorLookup(node, parameters, num, data); + rpp_status = rppi_tensor_look_up_table_u8_host(data->pSrc, data->pDst, data->luPtr, data->tensorDimensions, data->tensorDimensionsValue); + } STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u),data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); return return_status; } -static vx_status VX_CALLBACK initializeTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorLookupLocalData * data = new TensorLookupLocalData; - memset(data, 0, sizeof(*data)); + TensorLookupLocalData *data = new TensorLookupLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + cl_context theContext; // theContext + cl_command_queue theQueue; // command theQueue theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + size_t arr_size; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); data->cl_pSrc = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); refreshTensorLookup(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + refreshTensorLookup(node, parameters, num, data); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeTensorLookup(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorLookupLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + TensorLookupLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status TensorLookup_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorLookup", - VX_KERNEL_RPP_TENSORLOOKUP, - processTensorLookup, - 6, - validateTensorLookup, - initializeTensorLookup, - uninitializeTensorLookup); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorLookup", + VX_KERNEL_RPP_TENSORLOOKUP, + processTensorLookup, + 6, + validateTensorLookup, + initializeTensorLookup, + uninitializeTensorLookup); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/TensorMatrixMultiply.cpp b/amd_openvx_extensions/amd_rpp/source/TensorMatrixMultiply.cpp index 8410313922..e370da0e19 100644 --- a/amd_openvx_extensions/amd_rpp/source/TensorMatrixMultiply.cpp +++ b/amd_openvx_extensions/amd_rpp/source/TensorMatrixMultiply.cpp @@ -22,119 +22,146 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct TensorMatrixMultiplyLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp8u *pSrc1; +struct TensorMatrixMultiplyLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp8u *pSrc1; Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u *tensorDimensionsValue1; - Rpp32u *tensorDimensionsValue2; + Rpp8u *pDst; + Rpp32u *tensorDimensionsValue1; + Rpp32u *tensorDimensionsValue2; #if ENABLE_OPENCL - cl_mem cl_pSrc1; + cl_mem cl_pSrc1; cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorMatrixMultiplyLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u),data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u),data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[3], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->tensorDimensionsValue1 = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[3], 0, arr_size, sizeof(Rpp32u),data->tensorDimensionsValue1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->tensorDimensionsValue2 = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u),data->tensorDimensionsValue2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - size_t bytes; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, + cl_context theContext; + cl_command_queue theQueue; + theQueue = data->handle.cmdq; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + cl_int err; + size_t bytes; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + bytes = arr_size * sizeof(Rpp8u); + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, bytes, data->pSrc1, 0, NULL, NULL); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + bytes = arr_size * sizeof(Rpp8u); + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, bytes, data->pSrc2, 0, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t err; + err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; + err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; #endif - } - - return status; + } + + return status; } static vx_status VX_CALLBACK validateTensorMatrixMultiply(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; size_t arr_size; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); + return status; } -static vx_status VX_CALLBACK processTensorMatrixMultiply(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorMatrixMultiplyLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + TensorMatrixMultiplyLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); size_t arr_size; - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshTensorMatrixMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_matrix_multiply_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensionsValue1, data->tensorDimensionsValue2,data->rppHandle); + refreshTensorMatrixMultiply(node, parameters, num, data); + rpp_status = rppi_tensor_matrix_multiply_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensionsValue1, data->tensorDimensionsValue2, data->rppHandle); cl_command_queue theQueue; theQueue = data->handle.cmdq; cl_int err; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshTensorMatrixMultiply(node, parameters, num, data); + rpp_status = rppi_tensor_matrix_multiply_u8_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, (void *)data->hip_pDst, data->tensorDimensionsValue1, data->tensorDimensionsValue2, data->rppHandle); + hipError_t err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); + if (err != hipSuccess) + return VX_FAILURE; + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshTensorMatrixMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_matrix_multiply_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensionsValue1, data->tensorDimensionsValue2,data->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshTensorMatrixMultiply(node, parameters, num, data); + rpp_status = rppi_tensor_matrix_multiply_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensionsValue1, data->tensorDimensionsValue2, data->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u),data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); return return_status; } -static vx_status VX_CALLBACK initializeTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorMatrixMultiplyLocalData * data = new TensorMatrixMultiplyLocalData; - memset(data, 0, sizeof(*data)); + TensorMatrixMultiplyLocalData *data = new TensorMatrixMultiplyLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + cl_context theContext; // theContext + cl_command_queue theQueue; // command theQueue theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + size_t arr_size; size_t bytes; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); bytes = arr_size * sizeof(Rpp8u); @@ -145,70 +172,114 @@ static vx_status VX_CALLBACK initializeTensorMatrixMultiply(vx_node node, const STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); bytes = arr_size * sizeof(Rpp8u); data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t status; + status = hipMalloc(&data->hip_pSrc1, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pSrc2, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pDst, bytes); + if (status != hipSuccess) + return VX_FAILURE; #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); refreshTensorMatrixMultiply(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeTensorMatrixMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorMatrixMultiplyLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + TensorMatrixMultiplyLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status TensorMatrixMultiply_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorMatrixMultiply", - VX_KERNEL_RPP_TENSORMATRIXMULTIPLY, - processTensorMatrixMultiply, - 6, - validateTensorMatrixMultiply, - initializeTensorMatrixMultiply, - uninitializeTensorMatrixMultiply); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorMatrixMultiply", + VX_KERNEL_RPP_TENSORMATRIXMULTIPLY, + processTensorMatrixMultiply, + 6, + validateTensorMatrixMultiply, + initializeTensorMatrixMultiply, + uninitializeTensorMatrixMultiply); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT , VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/TensorMultiply.cpp b/amd_openvx_extensions/amd_rpp/source/TensorMultiply.cpp index 6fc565da14..bf999f1e0c 100644 --- a/amd_openvx_extensions/amd_rpp/source/TensorMultiply.cpp +++ b/amd_openvx_extensions/amd_rpp/source/TensorMultiply.cpp @@ -22,183 +22,256 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct TensorMultiplyLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp8u *pSrc1; +struct TensorMultiplyLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp8u *pSrc1; Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; + Rpp8u *pDst; + Rpp32u tensorDimensions; + Rpp32u *tensorDimensionsValue; #if ENABLE_OPENCL - cl_mem cl_pSrc1; + cl_mem cl_pSrc1; cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorMultiplyLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u),data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u),data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u),data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, + cl_context theContext; + cl_command_queue theQueue; + theQueue = data->handle.cmdq; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + cl_int err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, bytes, data->pSrc1, 0, NULL, NULL); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, bytes, data->pSrc2, 0, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t err; + err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; + err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; #endif - } - - return status; + } + + return status; } static vx_status VX_CALLBACK validateTensorMultiply(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); + return status; } -static vx_status VX_CALLBACK processTensorMultiply(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorMultiplyLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + TensorMultiplyLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); size_t arr_size; - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshTensorMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_multiply_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue,data->rppHandle); + refreshTensorMultiply(node, parameters, num, data); + rpp_status = rppi_tensor_multiply_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); cl_command_queue theQueue; theQueue = data->handle.cmdq; cl_int err; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshTensorMultiply(node, parameters, num, data); + rpp_status = rppi_tensor_multiply_u8_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, (void *)data->hip_pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); + hipError_t err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); + if (err != hipSuccess) + return VX_FAILURE; + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshTensorMultiply(node, parameters, num, data); - rpp_status = rppi_tensor_multiply_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue,data->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshTensorMultiply(node, parameters, num, data); + rpp_status = rppi_tensor_multiply_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u),data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); return return_status; } -static vx_status VX_CALLBACK initializeTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorMultiplyLocalData * data = new TensorMultiplyLocalData; - memset(data, 0, sizeof(*data)); + TensorMultiplyLocalData *data = new TensorMultiplyLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + cl_context theContext; // theContext + cl_command_queue theQueue; // command theQueue theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + size_t arr_size; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t status; + status = hipMalloc(&data->hip_pSrc1, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pSrc2, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pDst, bytes); + if (status != hipSuccess) + return VX_FAILURE; #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); refreshTensorMultiply(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeTensorMultiply(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorMultiplyLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + TensorMultiplyLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status TensorMultiply_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorMultiply", - VX_KERNEL_RPP_TENSORMULTIPLY, - processTensorMultiply, - 6, - validateTensorMultiply, - initializeTensorMultiply, - uninitializeTensorMultiply); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorMultiply", + VX_KERNEL_RPP_TENSORMULTIPLY, + processTensorMultiply, + 6, + validateTensorMultiply, + initializeTensorMultiply, + uninitializeTensorMultiply); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT , VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/TensorSubtract.cpp b/amd_openvx_extensions/amd_rpp/source/TensorSubtract.cpp index a12adcfeb2..c4946d5188 100644 --- a/amd_openvx_extensions/amd_rpp/source/TensorSubtract.cpp +++ b/amd_openvx_extensions/amd_rpp/source/TensorSubtract.cpp @@ -22,183 +22,256 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct TensorSubtractLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp8u *pSrc1; +struct TensorSubtractLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp8u *pSrc1; Rpp8u *pSrc2; - Rpp8u *pDst; - Rpp32u tensorDimensions; - Rpp32u *tensorDimensionsValue; + Rpp8u *pDst; + Rpp32u tensorDimensions; + Rpp32u *tensorDimensionsValue; #if ENABLE_OPENCL - cl_mem cl_pSrc1; + cl_mem cl_pSrc1; cl_mem cl_pSrc2; - cl_mem cl_pDst; -#endif + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc1; + void *hip_pSrc2; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num, TensorSubtractLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; + vx_status status = VX_SUCCESS; + size_t arr_size; + vx_status copy_status; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u),data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); + data->pSrc1 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[0], 0, arr_size, sizeof(Rpp8u), data->pSrc1, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); + data->pSrc2 = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); data->pDst = (Rpp8u *)malloc(sizeof(Rpp8u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u),data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, arr_size, sizeof(Rpp8u), data->pSrc2, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->tensorDimensions)); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); data->tensorDimensionsValue = (Rpp32u *)malloc(sizeof(Rpp32u) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u),data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(Rpp32u), data->tensorDimensionsValue, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_context theContext; - cl_command_queue theQueue; - theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - cl_int err; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - size_t bytes = arr_size * sizeof(Rpp8u); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, + cl_context theContext; + cl_command_queue theQueue; + theQueue = data->handle.cmdq; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + cl_int err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc1, CL_TRUE, 0, bytes, data->pSrc1, 0, NULL, NULL); - err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, + err = clEnqueueWriteBuffer(theQueue, data->cl_pSrc2, CL_TRUE, 0, bytes, data->pSrc2, 0, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t err; + err = hipMemcpy(data->hip_pSrc1, data->pSrc1, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; + err = hipMemcpy(data->hip_pSrc2, data->pSrc2, bytes, hipMemcpyHostToDevice); + if (err != hipSuccess) + return VX_FAILURE; #endif - } - - return status; + } + + return status; } static vx_status VX_CALLBACK validateTensorSubtract(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; size_t arr_size; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); - return status; + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_ARRAY_ITEMTYPE, &scalar_type, sizeof(scalar_type))); + return status; } -static vx_status VX_CALLBACK processTensorSubtract(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - TensorSubtractLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + TensorSubtractLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); size_t arr_size; - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshTensorSubtract(node, parameters, num, data); - rpp_status = rppi_tensor_subtract_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue,data->rppHandle); + refreshTensorSubtract(node, parameters, num, data); + rpp_status = rppi_tensor_subtract_u8_gpu((void *)data->cl_pSrc1, (void *)data->cl_pSrc2, (void *)data->cl_pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); cl_command_queue theQueue; theQueue = data->handle.cmdq; cl_int err; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); - clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL ); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + clEnqueueReadBuffer(theQueue, data->cl_pDst, CL_TRUE, 0, bytes, data->pDst, 0, NULL, NULL); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshTensorSubtract(node, parameters, num, data); + rpp_status = rppi_tensor_subtract_u8_gpu((void *)data->hip_pSrc1, (void *)data->hip_pSrc2, (void *)data->hip_pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); + hipError_t err; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[1], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + err = hipMemcpy(data->pDst, data->hip_pDst, bytes, hipMemcpyDeviceToHost); + if (err != hipSuccess) + return VX_FAILURE; + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshTensorSubtract(node, parameters, num, data); - rpp_status = rppi_tensor_subtract_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue,data->rppHandle); - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - } + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshTensorSubtract(node, parameters, num, data); + rpp_status = rppi_tensor_subtract_u8_host(data->pSrc1, data->pSrc2, data->pDst, data->tensorDimensions, data->tensorDimensionsValue, data->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - vx_status copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u),data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); - return return_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp8u), data->pDst, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST)); + return return_status; } -static vx_status VX_CALLBACK initializeTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorSubtractLocalData * data = new TensorSubtractLocalData; - memset(data, 0, sizeof(*data)); + TensorSubtractLocalData *data = new TensorSubtractLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); - cl_context theContext; // theContext - cl_command_queue theQueue; // command theQueue + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + cl_context theContext; // theContext + cl_command_queue theQueue; // command theQueue theQueue = data->handle.cmdq; - clGetCommandQueueInfo( theQueue, - CL_QUEUE_CONTEXT, - sizeof(cl_context), &theContext, NULL); - size_t arr_size; + clGetCommandQueueInfo(theQueue, + CL_QUEUE_CONTEXT, + sizeof(cl_context), &theContext, NULL); + size_t arr_size; STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); size_t bytes = arr_size * sizeof(Rpp8u); data->cl_pSrc1 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pSrc2 = clCreateBuffer(theContext, CL_MEM_READ_ONLY, bytes, NULL, NULL); data->cl_pDst = clCreateBuffer(theContext, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + size_t arr_size; + STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[0], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); + size_t bytes = arr_size * sizeof(Rpp8u); + hipError_t status; + status = hipMalloc(&data->hip_pSrc1, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pSrc2, bytes); + if (status != hipSuccess) + return VX_FAILURE; + status = hipMalloc(&data->hip_pDst, bytes); + if (status != hipSuccess) + return VX_FAILURE; #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); refreshTensorSubtract(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.cmdq); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStream(&data->rppHandle, data->handle.hipstream); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, 1); + + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeTensorSubtract(vx_node node, const vx_reference *parameters, vx_uint32 num) { - TensorSubtractLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + TensorSubtractLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status TensorSubtract_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorSubtract", - VX_KERNEL_RPP_TENSORSUBTRACT, - processTensorSubtract, - 6, - validateTensorSubtract, - initializeTensorSubtract, - uninitializeTensorSubtract); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.TensorSubtract", + VX_KERNEL_RPP_TENSORSUBTRACT, + processTensorSubtract, + 6, + validateTensorSubtract, + initializeTensorSubtract, + uninitializeTensorSubtract); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT , VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_BIDIRECTIONAL, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/Thresholding.cpp b/amd_openvx_extensions/amd_rpp/source/Thresholding.cpp deleted file mode 100644 index 5551803ef3..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Thresholding.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ThresholdingLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp8u min; - Rpp8u max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshThresholding(vx_node node, const vx_reference *parameters, vx_uint32 num, ThresholdingLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->min)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->max)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateThresholding(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Thresholding: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processThresholding(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ThresholdingLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshThresholding(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->min,data->max,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->min,data->max,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshThresholding(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->min,data->max,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->min,data->max,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeThresholding(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingLocalData * data = new ThresholdingLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshThresholding(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeThresholding(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Thresholding_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Thresholding", - VX_KERNEL_RPP_THRESHOLDING, - processThresholding, - 5, - validateThresholding, - initializeThresholding, - uninitializeThresholding); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPD.cpp index dbf8a98261..f27bb1eb46 100644 --- a/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPD.cpp @@ -22,206 +22,269 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct ThresholdingbatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint8 *min; - vx_uint8 *max; +struct ThresholdingbatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_uint8 *min; + vx_uint8 *max; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, ThresholdingbatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->min = (vx_uint8 *)malloc(sizeof(vx_uint8) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint8),data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->max = (vx_uint8 *)malloc(sizeof(vx_uint8) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint8),data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_uint8), data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(vx_uint8), data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateThresholdingbatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ThresholdingbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ThresholdingbatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processThresholdingbatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ThresholdingbatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ThresholdingbatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshThresholdingbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshThresholdingbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_thresholding_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_thresholding_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshThresholdingbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_thresholding_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_thresholding_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshThresholdingbatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshThresholdingbatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_thresholding_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_thresholding_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->min, data->max, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ThresholdingbatchPDLocalData * data = new ThresholdingbatchPDLocalData; - memset(data, 0, sizeof(*data)); + ThresholdingbatchPDLocalData *data = new ThresholdingbatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshThresholdingbatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); + data->min = (vx_uint8 *)malloc(sizeof(vx_uint8) * data->nbatchSize); + data->max = (vx_uint8 *)malloc(sizeof(vx_uint8) * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshThresholdingbatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeThresholdingbatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - ThresholdingbatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + ThresholdingbatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->min); + free(data->max); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status ThresholdingbatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ThresholdingbatchPD", - VX_KERNEL_RPP_THRESHOLDINGBATCHPD, - processThresholdingbatchPD, - 8, - validateThresholdingbatchPD, - initializeThresholdingbatchPD, - uninitializeThresholdingbatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ThresholdingbatchPD", + VX_KERNEL_RPP_THRESHOLDINGBATCHPD, + processThresholdingbatchPD, + 8, + validateThresholdingbatchPD, + initializeThresholdingbatchPD, + uninitializeThresholdingbatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPDROID.cpp deleted file mode 100644 index 275fe10916..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPDROID.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ThresholdingbatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_uint8 *min; - vx_uint8 *max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshThresholdingbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, ThresholdingbatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->min = (vx_uint8 *)malloc(sizeof(vx_uint8) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_uint8),data->min, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[5], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->max = (vx_uint8 *)malloc(sizeof(vx_uint8) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, arr_size, sizeof(vx_uint8),data->max, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateThresholdingbatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ThresholdingbatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processThresholdingbatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ThresholdingbatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshThresholdingbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshThresholdingbatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeThresholdingbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingbatchPDROIDLocalData * data = new ThresholdingbatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[11], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshThresholdingbatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeThresholdingbatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingbatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ThresholdingbatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ThresholdingbatchPDROID", - VX_KERNEL_RPP_THRESHOLDINGBATCHPDROID, - processThresholdingbatchPDROID, - 12, - validateThresholdingbatchPDROID, - initializeThresholdingbatchPDROID, - uninitializeThresholdingbatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPS.cpp deleted file mode 100644 index 2aaaba912b..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/ThresholdingbatchPS.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct ThresholdingbatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp8u min; - Rpp8u max; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshThresholdingbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, ThresholdingbatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->min)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->max)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateThresholdingbatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT8) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: ThresholdingbatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processThresholdingbatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - ThresholdingbatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshThresholdingbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshThresholdingbatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_thresholding_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_thresholding_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->min,data->max,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeThresholdingbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingbatchPSLocalData * data = new ThresholdingbatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshThresholdingbatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeThresholdingbatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - ThresholdingbatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status ThresholdingbatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ThresholdingbatchPS", - VX_KERNEL_RPP_THRESHOLDINGBATCHPS, - processThresholdingbatchPS, - 8, - validateThresholdingbatchPS, - initializeThresholdingbatchPS, - uninitializeThresholdingbatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/Vignette.cpp b/amd_openvx_extensions/amd_rpp/source/Vignette.cpp deleted file mode 100644 index bc3790e4cf..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/Vignette.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct VignetteLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshVignette(vx_node node, const vx_reference *parameters, vx_uint32 num, VignetteLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[2], &data->stdDev)); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateVignette(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[2], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #2 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Vignette: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processVignette(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - VignetteLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshVignette(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->stdDev,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshVignette(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->stdDev,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeVignette(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignetteLocalData * data = new VignetteLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshVignette(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeVignette(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignetteLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status Vignette_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Vignette", - VX_KERNEL_RPP_VIGNETTE, - processVignette, - 4, - validateVignette, - initializeVignette, - uninitializeVignette); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/VignettebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/VignettebatchPD.cpp index 4939a8644f..a824ad9664 100644 --- a/amd_openvx_extensions/amd_rpp/source/VignettebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/VignettebatchPD.cpp @@ -22,220 +22,263 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct VignettebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; +struct VignettebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_float32 *stdDev; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; -#endif + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, VignettebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(vx_float32), data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateVignettebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: VignettebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: VignettebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processVignettebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - VignettebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + VignettebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshVignettebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshVignettebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_vignette_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->stdDev, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_vignette_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->stdDev, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshVignettebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshVignettebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_vignette_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->stdDev, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_vignette_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->stdDev, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshVignettebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshVignettebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_vignette_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_vignette_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->stdDev, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - VignettebatchPDLocalData * data = new VignettebatchPDLocalData; - memset(data, 0, sizeof(*data)); + VignettebatchPDLocalData *data = new VignettebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshVignettebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * data->nbatchSize); + refreshVignettebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeVignettebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - VignettebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + VignettebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->stdDev); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status VignettebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.VignettebatchPD", - VX_KERNEL_RPP_VIGNETTEBATCHPD, - processVignettebatchPD, - 7, - validateVignettebatchPD, - initializeVignettebatchPD, - uninitializeVignettebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.VignettebatchPD", + VX_KERNEL_RPP_VIGNETTEBATCHPD, + processVignettebatchPD, + 7, + validateVignettebatchPD, + initializeVignettebatchPD, + uninitializeVignettebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/VignettebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/VignettebatchPDROID.cpp deleted file mode 100644 index 9c53d4e5b2..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/VignettebatchPDROID.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct VignettebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - vx_float32 *stdDev; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshVignettebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, VignettebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[4], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->stdDev = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, arr_size, sizeof(vx_float32),data->stdDev, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateVignettebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #9 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #10 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: VignettebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processVignettebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - VignettebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshVignettebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshVignettebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeVignettebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignettebatchPDROIDLocalData * data = new VignettebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshVignettebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeVignettebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignettebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status VignettebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.VignettebatchPDROID", - VX_KERNEL_RPP_VIGNETTEBATCHPDROID, - processVignettebatchPDROID, - 11, - validateVignettebatchPDROID, - initializeVignettebatchPDROID, - uninitializeVignettebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/VignettebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/VignettebatchPS.cpp deleted file mode 100644 index b170ffa1e4..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/VignettebatchPS.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct VignettebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f stdDev; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshVignettebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, VignettebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->stdDev)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateVignettebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: VignettebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processVignettebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - VignettebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshVignettebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshVignettebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_vignette_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_vignette_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->stdDev,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeVignettebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignettebatchPSLocalData * data = new VignettebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshVignettebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeVignettebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - VignettebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status VignettebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.VignettebatchPS", - VX_KERNEL_RPP_VIGNETTEBATCHPS, - processVignettebatchPS, - 7, - validateVignettebatchPS, - initializeVignettebatchPS, - uninitializeVignettebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/WarpAffine.cpp b/amd_openvx_extensions/amd_rpp/source/WarpAffine.cpp deleted file mode 100644 index bf1baa7fe7..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/WarpAffine.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpAffineLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *affine; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpAffineLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->affine = (Rpp32f *)malloc(sizeof(Rpp32f) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp32f),data->affine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpAffine(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpAffine: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpAffine(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpAffineLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpAffine(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->affine,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->affine,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpAffine(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->affine,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->affine,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffineLocalData * data = new WarpAffineLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpAffine(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpAffine(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffineLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status WarpAffine_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffine", - VX_KERNEL_RPP_WARPAFFINE, - processWarpAffine, - 4, - validateWarpAffine, - initializeWarpAffine, - uninitializeWarpAffine); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPD.cpp index b2581ac91b..bd219598c6 100644 --- a/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPD.cpp @@ -22,235 +22,283 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct WarpAffinebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *affine; +struct WarpAffinebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32f *affine; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; + cl_mem cl_pSrc; + cl_mem cl_pDst; #elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; + void *hip_pSrc; + void *hip_pDst; #endif }; static vx_status VX_CALLBACK refreshWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpAffinebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->affine = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->affine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, 6 * data->nbatchSize, sizeof(vx_float32), data->affine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateWarpAffinebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpAffinebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpAffinebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processWarpAffinebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpAffinebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - vx_int32 output_format_toggle = 0; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + WarpAffinebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + vx_int32 output_format_toggle = 0; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpAffinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,output_format_toggle,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,output_format_toggle,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshWarpAffinebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_warp_affine_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_warp_affine_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #elif ENABLE_HIP - refreshWarpAffinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,output_format_toggle,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->hip_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,output_format_toggle,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshWarpAffinebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_warp_affine_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_warp_affine_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpAffinebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->affine,output_format_toggle,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->affine,output_format_toggle,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshWarpAffinebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_warp_affine_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_warp_affine_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->affine, output_format_toggle, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } static vx_status VX_CALLBACK initializeWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - WarpAffinebatchPDLocalData * data = new WarpAffinebatchPDLocalData; - memset(data, 0, sizeof(*data)); + WarpAffinebatchPDLocalData *data = new WarpAffinebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpAffinebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); + data->affine = (vx_float32 *)malloc(sizeof(vx_float32) * 6 * data->nbatchSize); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + refreshWarpAffinebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); #elif ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeWarpAffinebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - WarpAffinebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + WarpAffinebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); #if ENABLE_OPENCL || ENABLE_HIP - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->affine); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; } vx_status WarpAffinebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffinebatchPD", - VX_KERNEL_RPP_WARPAFFINEBATCHPD, - processWarpAffinebatchPD, - 9, - validateWarpAffinebatchPD, - initializeWarpAffinebatchPD, - uninitializeWarpAffinebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffinebatchPD", + VX_KERNEL_RPP_WARPAFFINEBATCHPD, + processWarpAffinebatchPD, + 9, + validateWarpAffinebatchPD, + initializeWarpAffinebatchPD, + uninitializeWarpAffinebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL || ENABLE_HIP - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPDROID.cpp deleted file mode 100644 index 3faca04e51..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPDROID.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpAffinebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *affine; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpAffinebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpAffinebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->affine = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->affine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpAffinebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpAffinebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpAffinebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpAffinebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpAffinebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpAffinebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpAffinebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffinebatchPDROIDLocalData * data = new WarpAffinebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpAffinebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpAffinebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffinebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status WarpAffinebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffinebatchPDROID", - VX_KERNEL_RPP_WARPAFFINEBATCHPDROID, - processWarpAffinebatchPDROID, - 13, - validateWarpAffinebatchPDROID, - initializeWarpAffinebatchPDROID, - uninitializeWarpAffinebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT,VX_TYPE_ARRAY , VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPS.cpp deleted file mode 100644 index ea55e9482f..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/WarpAffinebatchPS.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpAffinebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *affine; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpAffinebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpAffinebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->affine = (Rpp32f *)malloc(sizeof(Rpp32f) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(Rpp32f),data->affine, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpAffinebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpAffinebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpAffinebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpAffinebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpAffinebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpAffinebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_affine_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_affine_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->affine,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpAffinebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffinebatchPSLocalData * data = new WarpAffinebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpAffinebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpAffinebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpAffinebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status WarpAffinebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpAffinebatchPS", - VX_KERNEL_RPP_WARPAFFINEBATCHPS, - processWarpAffinebatchPS, - 9, - validateWarpAffinebatchPS, - initializeWarpAffinebatchPS, - uninitializeWarpAffinebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/WarpPerspective.cpp b/amd_openvx_extensions/amd_rpp/source/WarpPerspective.cpp deleted file mode 100644 index fd7edf5c76..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/WarpPerspective.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpPerspectiveLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - RppiSize srcDimensions; - RppiSize dstDimensions; - Rpp32u device_type; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *perspective; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpPerspective(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpPerspectiveLocalData *data) -{ - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->srcDimensions.height, sizeof(data->srcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->srcDimensions.width, sizeof(data->srcDimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &data->dstDimensions.height, sizeof(data->dstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &data->dstDimensions.width, sizeof(data->dstDimensions.width))); - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->perspective = (Rpp32f *)malloc(sizeof(Rpp32f) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, arr_size, sizeof(Rpp32f),data->perspective, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpPerspective(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpPerspective: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,1); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpPerspective(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpPerspectiveLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpPerspective(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->perspective,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_gpu((void *)data->cl_pSrc,data->srcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->perspective,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpPerspective(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->perspective,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_host(data->pSrc,data->srcDimensions,data->pDst,data->dstDimensions,data->perspective,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpPerspective(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectiveLocalData * data = new WarpPerspectiveLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpPerspective(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStream(&data->rppHandle, data->handle.cmdq); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, 1); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpPerspective(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectiveLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status WarpPerspective_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpPerspective", - VX_KERNEL_RPP_WARPPERSPECTIVE, - processWarpPerspective, - 4, - validateWarpPerspective, - initializeWarpPerspective, - uninitializeWarpPerspective); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPD.cpp b/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPD.cpp index ba29cbc34d..24f24979b3 100644 --- a/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPD.cpp +++ b/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPD.cpp @@ -22,215 +22,283 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct WarpPerspectivebatchPDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *perspective; +struct WarpPerspectivebatchPDLocalData +{ + RPPCommonHandle handle; + rppHandle_t rppHandle; + Rpp32u device_type; + Rpp32u nbatchSize; + RppiSize *srcDimensions; + RppiSize maxSrcDimensions; + Rpp32u *srcBatch_width; + Rpp32u *srcBatch_height; + RppiSize *dstDimensions; + RppiSize maxDstDimensions; + Rpp32u *dstBatch_width; + Rpp32u *dstBatch_height; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32f *perspective; #if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif + cl_mem cl_pSrc; + cl_mem cl_pDst; +#elif ENABLE_HIP + void *hip_pSrc; + void *hip_pDst; +#endif }; static vx_status VX_CALLBACK refreshWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpPerspectivebatchPDLocalData *data) { - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->perspective = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->perspective, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + vx_status status = VX_SUCCESS; + vx_status copy_status; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[6], 0, 9 * data->nbatchSize, sizeof(vx_float32), data->perspective, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); + data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); + data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + for (int i = 0; i < data->nbatchSize; i++) + { + data->srcDimensions[i].width = data->srcBatch_width[i]; + data->srcDimensions[i].height = data->srcBatch_height[i]; + data->dstDimensions[i].width = data->dstBatch_width[i]; + data->dstDimensions[i].height = data->dstBatch_height[i]; + } + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateWarpPerspectivebatchPD(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpPerspectivebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type); + // Check for input parameters + vx_parameter input_param; + vx_image input; + vx_df_image df_image; + input_param = vxGetParameterByIndex(node, 0); + STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) + { + return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpPerspectivebatchPD: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); + } + + // Check for output parameters + vx_image output; + vx_df_image format; + vx_parameter output_param; + vx_uint32 height, width; + output_param = vxGetParameterByIndex(node, 3); + STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + vxReleaseImage(&input); + vxReleaseImage(&output); + vxReleaseParameter(&output_param); + vxReleaseParameter(&input_param); + return status; } -static vx_status VX_CALLBACK processWarpPerspectivebatchPD(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpPerspectivebatchPDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { +static vx_status VX_CALLBACK processWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +{ + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + WarpPerspectivebatchPDLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + vx_df_image df_image = VX_DF_IMAGE_VIRT; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpPerspectivebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + refreshWarpPerspectivebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_warp_perspective_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#elif ENABLE_HIP + refreshWarpPerspectivebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_warp_perspective_u8_pln1_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_gpu((void *)data->hip_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->hip_pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; #endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpPerspectivebatchPD(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + refreshWarpPerspectivebatchPD(node, parameters, num, data); + if (df_image == VX_DF_IMAGE_U8) + { + rpp_status = rppi_warp_perspective_u8_pln1_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->rppHandle); + } + else if (df_image == VX_DF_IMAGE_RGB) + { + rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_host(data->pSrc, data->srcDimensions, data->maxSrcDimensions, data->pDst, data->dstDimensions, data->maxDstDimensions, data->perspective, data->nbatchSize, data->rppHandle); + } + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; } -static vx_status VX_CALLBACK initializeWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) +static vx_status VX_CALLBACK initializeWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - WarpPerspectivebatchPDLocalData * data = new WarpPerspectivebatchPDLocalData; - memset(data, 0, sizeof(*data)); + WarpPerspectivebatchPDLocalData *data = new WarpPerspectivebatchPDLocalData; + memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); #endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpPerspectivebatchPD(node, parameters, num, data); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); + data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); + data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); + data->perspective = (vx_float32 *)malloc(sizeof(vx_float32) * 9 * data->nbatchSize); + refreshWarpPerspectivebatchPD(node, parameters, num, data); #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); +#elif ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize); #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } static vx_status VX_CALLBACK uninitializeWarpPerspectivebatchPD(vx_node node, const vx_reference *parameters, vx_uint32 num) { - WarpPerspectivebatchPDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + WarpPerspectivebatchPDLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); +#if ENABLE_OPENCL || ENABLE_HIP + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + rppDestroyGPU(data->rppHandle); +#endif + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + rppDestroyHost(data->rppHandle); + free(data->srcDimensions); + free(data->dstDimensions); + free(data->srcBatch_width); + free(data->srcBatch_height); + free(data->dstBatch_width); + free(data->dstBatch_height); + free(data->perspective); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes #if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; #endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; + + return VX_SUCCESS; } vx_status WarpPerspectivebatchPD_Register(vx_context context) { - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpPerspectivebatchPD", - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD, - processWarpPerspectivebatchPD, - 9, - validateWarpPerspectivebatchPD, - initializeWarpPerspectivebatchPD, - uninitializeWarpPerspectivebatchPD); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpPerspectivebatchPD", + VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD, + processWarpPerspectivebatchPD, + 9, + validateWarpPerspectivebatchPD, + initializeWarpPerspectivebatchPD, + uninitializeWarpPerspectivebatchPD); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + if (kernel) + { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) + { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPDROID.cpp b/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPDROID.cpp deleted file mode 100644 index 8c08ca213c..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPDROID.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpPerspectivebatchPDROIDLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiROI *roiPoints; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize *dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *perspective; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpPerspectivebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpPerspectivebatchPDROIDLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->perspective = (vx_float32 *)malloc(sizeof(vx_float32) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(vx_float32),data->perspective, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u),dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - data->dstDimensions[i].width = dstBatch_width[i]; - data->dstDimensions[i].height = dstBatch_height[i]; - } - data->roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * data->nbatchSize); - Rpp32u *batch_roiX = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiY = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiWidth = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *batch_roiHeight = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[7], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiX, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[8], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiY, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[9], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiWidth, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[10], 0, data->nbatchSize, sizeof(Rpp32u),batch_roiHeight, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->roiPoints[i].x = batch_roiX[i]; - data->roiPoints[i].y = batch_roiY[i]; - data->roiPoints[i].roiWidth = batch_roiWidth[i]; - data->roiPoints[i].roiHeight = batch_roiHeight[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpPerspectivebatchPDROID(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #11 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #12 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpPerspectivebatchPDROID: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpPerspectivebatchPDROID(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpPerspectivebatchPDROIDLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpPerspectivebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_ROID_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpPerspectivebatchPDROID(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->roiPoints,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPD_ROID_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->roiPoints,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpPerspectivebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectivebatchPDROIDLocalData * data = new WarpPerspectivebatchPDROIDLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpPerspectivebatchPDROID(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpPerspectivebatchPDROID(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectivebatchPDROIDLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status WarpPerspectivebatchPDROID_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpPerspectivebatchPDROID", - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPDROID, - processWarpPerspectivebatchPDROID, - 13, - validateWarpPerspectivebatchPDROID, - initializeWarpPerspectivebatchPDROID, - uninitializeWarpPerspectivebatchPDROID); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT,VX_TYPE_ARRAY , VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPS.cpp b/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPS.cpp deleted file mode 100644 index 468e01573f..0000000000 --- a/amd_openvx_extensions/amd_rpp/source/WarpPerspectivebatchPS.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2019 - 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "internal_publishKernels.h" - -struct WarpPerspectivebatchPSLocalData { - RPPCommonHandle handle; - rppHandle_t rppHandle; - Rpp32u device_type; - Rpp32u nbatchSize; - RppiSize *srcDimensions; - RppiSize maxSrcDimensions; - RppiSize dstDimensions; - RppiSize maxDstDimensions; - RppPtr_t pSrc; - RppPtr_t pDst; - Rpp32f *perspective; -#if ENABLE_OPENCL - cl_mem cl_pSrc; - cl_mem cl_pDst; -#endif -}; - -static vx_status VX_CALLBACK refreshWarpPerspectivebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num, WarpPerspectivebatchPSLocalData *data) -{ - vx_status status = VX_SUCCESS; - size_t arr_size; - vx_status copy_status; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->dstDimensions.width)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->dstDimensions.height)); - STATUS_ERROR_CHECK(vxQueryArray((vx_array)parameters[6], VX_ARRAY_ATTRIBUTE_NUMITEMS, &arr_size, sizeof(arr_size))); - data->perspective = (Rpp32f *)malloc(sizeof(Rpp32f) * arr_size); - copy_status = vxCopyArrayRange((vx_array)parameters[6], 0, arr_size, sizeof(Rpp32f),data->perspective, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize)); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width))); - data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width))); - data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize; - data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize); - Rpp32u *srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - Rpp32u *srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize); - copy_status = vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - copy_status = vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u),srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST); - for(int i = 0; i < data->nbatchSize; i++){ - data->srcDimensions[i].width = srcBatch_width[i]; - data->srcDimensions[i].height = srcBatch_height[i]; - } - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } - return status; -} - -static vx_status VX_CALLBACK validateWarpPerspectivebatchPS(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ - vx_status status = VX_SUCCESS; - vx_enum scalar_type; - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if(scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - // Check for input parameters - vx_parameter input_param; - vx_image input; - vx_df_image df_image; - input_param = vxGetParameterByIndex(node,0); - STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - { - return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: WarpPerspectivebatchPS: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image); - } - - // Check for output parameters - vx_image output; - vx_df_image format; - vx_parameter output_param; - vx_uint32 height, width; - output_param = vxGetParameterByIndex(node,3); - STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - vxReleaseImage(&input); - vxReleaseImage(&output); - vxReleaseParameter(&output_param); - vxReleaseParameter(&input_param); - return status; -} - -static vx_status VX_CALLBACK processWarpPerspectivebatchPS(vx_node node, const vx_reference * parameters, vx_uint32 num) -{ - RppStatus rpp_status = RPP_SUCCESS; - vx_status return_status = VX_SUCCESS; - WarpPerspectivebatchPSLocalData * data = NULL; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; - refreshWarpPerspectivebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPS_gpu((void *)data->cl_pSrc,data->srcDimensions,data->maxSrcDimensions,(void *)data->cl_pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - -#endif - } - if(data->device_type == AGO_TARGET_AFFINITY_CPU) { - refreshWarpPerspectivebatchPS(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - rpp_status = rppi_warp_perspective_u8_pln1_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - else if(df_image == VX_DF_IMAGE_RGB) { - rpp_status = rppi_warp_perspective_u8_pkd3_batchPS_host(data->pSrc,data->srcDimensions,data->maxSrcDimensions,data->pDst,data->dstDimensions,data->maxDstDimensions,data->perspective,data->nbatchSize,data->rppHandle); - } - return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; - - } - return return_status; -} - -static vx_status VX_CALLBACK initializeWarpPerspectivebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectivebatchPSLocalData * data = new WarpPerspectivebatchPSLocalData; - memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#endif - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - refreshWarpPerspectivebatchPS(node, parameters, num, data); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize); - - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; -} - -static vx_status VX_CALLBACK uninitializeWarpPerspectivebatchPS(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ - WarpPerspectivebatchPSLocalData * data; - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); -#if ENABLE_OPENCL - if(data->device_type == AGO_TARGET_AFFINITY_GPU) - rppDestroyGPU(data->rppHandle); -#endif - if(data->device_type == AGO_TARGET_AFFINITY_CPU) - rppDestroyHost(data->rppHandle); - delete(data); - return VX_SUCCESS; -} - -vx_status WarpPerspectivebatchPS_Register(vx_context context) -{ - vx_status status = VX_SUCCESS; - // Add kernel to the context with callbacks - vx_kernel kernel = vxAddUserKernel(context, "org.rpp.WarpPerspectivebatchPS", - VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPS, - processWarpPerspectivebatchPS, - 9, - validateWarpPerspectivebatchPS, - initializeWarpPerspectivebatchPS, - uninitializeWarpPerspectivebatchPS); - ERROR_CHECK_OBJECT(kernel); - AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL - // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers - vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); -#else - vx_bool enableBufferAccess = vx_false_e; -#endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); - } - if (status != VX_SUCCESS) - { - exit: vxRemoveKernel(kernel); return VX_FAILURE; - } - return status; -} diff --git a/amd_openvx_extensions/amd_rpp/source/copy.cpp b/amd_openvx_extensions/amd_rpp/source/copy.cpp index 4712f3d726..abb2a3d7d5 100644 --- a/amd_openvx_extensions/amd_rpp/source/copy.cpp +++ b/amd_openvx_extensions/amd_rpp/source/copy.cpp @@ -22,7 +22,8 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct CopyLocalData { +struct CopyLocalData +{ RPPCommonHandle handle; RppiSize dimensions; @@ -37,28 +38,29 @@ struct CopyLocalData { void *hip_pSrc; void *hip_pDst; #endif - }; static vx_status VX_CALLBACK refreshcopy(vx_node node, const vx_reference *parameters, vx_uint32 num, CopyLocalData *data) { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - #if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); - #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); - #endif - } - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - } + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { +#if ENABLE_OPENCL + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); +#endif + } + if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); + } + return status; } static vx_status VX_CALLBACK validateCopy(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) @@ -76,7 +78,7 @@ static vx_status VX_CALLBACK validateCopy(vx_node node, const vx_reference param STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &df_image, sizeof(df_image))); - vx_uint32 height, width; + vx_uint32 height, width; STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); @@ -87,41 +89,50 @@ static vx_status VX_CALLBACK validateCopy(vx_node node, const vx_reference param return status; } -static vx_status VX_CALLBACK processCopy(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processCopy(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CopyLocalData * data = NULL; + CopyLocalData *data = NULL; vx_status return_status = VX_SUCCESS; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); vx_df_image df_image = VX_DF_IMAGE_VIRT; STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - unsigned size = data->dimensions.height* data->dimensions.width; - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + unsigned size = data->dimensions.height * data->dimensions.width; + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL refreshcopy(node, parameters, num, data); cl_command_queue handle = data->handle.cmdq; - if (df_image == VX_DF_IMAGE_U8 ){ - clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, 0, 0, size, 0 , NULL, NULL); + if (df_image == VX_DF_IMAGE_U8) + { + clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, 0, 0, size, 0, NULL, NULL); } - else if(df_image == VX_DF_IMAGE_RGB) { - clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, 0, 0, size*3, 0 , NULL, NULL); + else if (df_image == VX_DF_IMAGE_RGB) + { + clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, 0, 0, size * 3, 0, NULL, NULL); } return_status = VX_SUCCESS; #elif ENABLE_HIP refreshcopy(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ - hipMemcpy(data->hip_pDst,data->hip_pSrc,size, hipMemcpyDeviceToDevice); + if (df_image == VX_DF_IMAGE_U8) + { + hipMemcpy(data->hip_pDst, data->hip_pSrc, size, hipMemcpyDeviceToDevice); } - else if(df_image == VX_DF_IMAGE_RGB) { - hipMemcpy(data->hip_pDst,data->hip_pSrc,size * 3, hipMemcpyDeviceToDevice); + else if (df_image == VX_DF_IMAGE_RGB) + { + hipMemcpy(data->hip_pDst, data->hip_pSrc, size * 3, hipMemcpyDeviceToDevice); } #endif - } else if(data->device_type == AGO_TARGET_AFFINITY_CPU) { + } + else if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { refreshcopy(node, parameters, num, data); - if (df_image == VX_DF_IMAGE_U8 ){ + if (df_image == VX_DF_IMAGE_U8) + { memcpy(data->pDst, data->pSrc, size); } - else if(df_image == VX_DF_IMAGE_RGB) { - memcpy(data->pDst, data->pSrc, size*3); + else if (df_image == VX_DF_IMAGE_RGB) + { + memcpy(data->pDst, data->pSrc, size * 3); } return_status = VX_SUCCESS; } @@ -130,7 +141,7 @@ static vx_status VX_CALLBACK processCopy(vx_node node, const vx_reference * para static vx_status VX_CALLBACK initializeCopy(vx_node node, const vx_reference *parameters, vx_uint32 num) { - CopyLocalData * data = new CopyLocalData; + CopyLocalData *data = new CopyLocalData; memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL @@ -150,10 +161,33 @@ static vx_status VX_CALLBACK uninitializeCopy(vx_node node, const vx_reference * return VX_SUCCESS; } +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; +} + vx_status Copy_Register(vx_context context) { vx_status status = VX_SUCCESS; -// add kernel to the context with callbacks + // add kernel to the context with callbacks vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Copy", VX_KERNEL_RPP_COPY, processCopy, @@ -164,27 +198,30 @@ vx_status Copy_Register(vx_context context) ERROR_CHECK_OBJECT(kernel); AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); -#if ENABLE_OPENCL|| ENABLE_HIP + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_OPENCL || ENABLE_HIP // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else vx_bool enableBufferAccess = vx_false_e; #endif + amd_kernel_query_target_support_f query_target_support_f = query_target_support; if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } if (status != VX_SUCCESS) { - exit: vxRemoveKernel(kernel); return VX_FAILURE; + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; } return status; diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp index a9b1ed3eb7..c7f039f55e 100644 --- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp @@ -26,14 +26,14 @@ THE SOFTWARE. /********************************************************************** PUBLIC FUNCTION for OpenVX user defined functions **********************************************************************/ -extern "C" SHARED_PUBLIC vx_status VX_API_CALL vxPublishKernels(vx_context context) +extern "C" SHARED_PUBLIC vx_status VX_API_CALL vxPublishKernels(vx_context context) { - vx_status status = VX_SUCCESS; + vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(get_kernels_to_publish()); - STATUS_ERROR_CHECK(Kernel_List->PUBLISH(context)); + STATUS_ERROR_CHECK(get_kernels_to_publish()); + STATUS_ERROR_CHECK(Kernel_List->PUBLISH(context)); - return status; + return status; } /************************************************************************************************************ @@ -41,277 +41,88 @@ Add All Kernels to the Kernel List *************************************************************************************************************/ vx_status get_kernels_to_publish() { - vx_status status = VX_SUCCESS; + vx_status status = VX_SUCCESS; - Kernel_List = new Kernellist(MAX_KERNELS); - STATUS_ERROR_CHECK(ADD_KERENEL(Brightness_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BrightnessbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BrightnessbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BrightnessbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GammaCorrection_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GammaCorrectionbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GammaCorrectionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GammaCorrectionbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Blend_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BlendbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BlendbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BlendbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Blur_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BlurbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BlurbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BlurbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Contrast_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ContrastbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ContrastbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ContrastbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Pixelate_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(PixelatebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(PixelatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(PixelatebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Jitter_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(JitterbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(JitterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(JitterbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Occlusion_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(OcclusionbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(OcclusionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(OcclusionbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Snow_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SnowbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SnowbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SnowbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Noise_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NoisebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NoisebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NoisebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomShadow_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomShadowbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomShadowbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomShadowbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Fog_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FogbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FogbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FogbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Rain_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RainbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RainbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RainbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomCropLetterBox_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomCropLetterBoxbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomCropLetterBoxbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RandomCropLetterBoxbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Exposure_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExposurebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExposurebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExposurebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramBalance_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramBalancebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramBalancebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramBalancebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AbsoluteDifference_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AbsoluteDifferencebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AbsoluteDifferencebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AbsoluteDifferencebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateWeighted_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateWeightedbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateWeightedbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateWeightedbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Accumulate_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulatebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulatebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Add_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AddbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AddbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AddbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Subtract_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SubtractbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SubtractbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SubtractbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Magnitude_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MagnitudebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MagnitudebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MagnitudebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Multiply_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MultiplybatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MultiplybatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MultiplybatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Phase_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(PhasebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(PhasebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(PhasebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateSquared_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateSquaredbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateSquaredbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateSquaredbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseAND_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseANDbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseANDbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseANDbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseNOT_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseNOTbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseNOTbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseNOTbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExclusiveOR_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExclusiveORbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExclusiveORbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ExclusiveORbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(InclusiveOR_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(InclusiveORbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(InclusiveORbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(InclusiveORbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Histogram_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Thresholding_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ThresholdingbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ThresholdingbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ThresholdingbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Max_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MaxbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MaxbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MaxbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Min_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MinbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MinbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MinbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MinMaxLoc_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramEqualize_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramEqualizebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramEqualizebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HistogramEqualizebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MeanStddev_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Flip_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FlipbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FlipbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FlipbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Resize_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCrop_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCropbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCropbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCropbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Rotate_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RotatebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RotatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(RotatebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpAffine_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpAffinebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpAffinebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpAffinebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Fisheye_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FisheyebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FisheyebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FisheyebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LensCorrection_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LensCorrectionbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LensCorrectionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LensCorrectionbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Scale_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ScalebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ScalebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ScalebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpPerspective_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpPerspectivebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpPerspectivebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(WarpPerspectivebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Dilate_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DilatebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DilatebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DilatebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Erode_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ErodebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ErodebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ErodebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Hue_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HuebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HuebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HuebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Saturation_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SaturationbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SaturationbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SaturationbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ColorTemperature_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ColorTemperaturebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ColorTemperaturebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ColorTemperaturebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Vignette_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(VignettebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(VignettebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(VignettebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ChannelExtract_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ChannelExtractbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ChannelExtractbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ChannelCombine_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ChannelCombinebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ChannelCombinebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LookUpTable_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LookUpTablebatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LookUpTablebatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LookUpTablebatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BilateralFilter_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BilateralFilterbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BilateralFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BilateralFilterbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BoxFilter_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BoxFilterbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BoxFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(BoxFilterbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Sobel_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SobelbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SobelbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(SobelbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MedianFilter_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MedianFilterbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MedianFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(MedianFilterbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CustomConvolution_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CustomConvolutionbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CustomConvolutionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CustomConvolutionbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonMaxSupression_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonMaxSupressionbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonMaxSupressionbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonMaxSupressionbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianFilter_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianFilterbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianFilterbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonLinearFilter_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonLinearFilterbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonLinearFilterbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(NonLinearFilterbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LocalBinaryPattern_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LocalBinaryPatternbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LocalBinaryPatternbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LocalBinaryPatternbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DataObjectCopy_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DataObjectCopybatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DataObjectCopybatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(DataObjectCopybatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianImagePyramid_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianImagePyramidbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(GaussianImagePyramidbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(LaplacianImagePyramid_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CannyEdgeDetector_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(HarrisCornerDetector_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(FastCornerDetector_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ControlFlow_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ControlFlowbatchPS_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ControlFlowbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ControlFlowbatchPDROID_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(remap_Register)); + Kernel_List = new Kernellist(MAX_KERNELS); + STATUS_ERROR_CHECK(ADD_KERENEL(BrightnessbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(GammaCorrectionbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(BlendbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(BlurbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ContrastbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(PixelatebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(JitterbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(SnowbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(NoisebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(RandomShadowbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(FogbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(RainbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(RandomCropLetterBoxbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ExposurebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(HistogramBalancebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(AbsoluteDifferencebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateWeightedbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(AccumulatebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(AddbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(SubtractbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MagnitudebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MultiplybatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(PhasebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(AccumulateSquaredbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseANDbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(BitwiseNOTbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ExclusiveORbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(InclusiveORbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(Histogram_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ThresholdingbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MaxbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MinbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MinMaxLoc_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(HistogramEqualizebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MeanStddev_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(FlipbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ResizebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCropbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(RotatebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(WarpAffinebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(FisheyebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(LensCorrectionbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ScalebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(WarpPerspectivebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(DilatebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ErodebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(HuebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(SaturationbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ColorTemperaturebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(VignettebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ChannelExtractbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ChannelCombinebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(LookUpTablebatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(BilateralFilterbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(BoxFilterbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(SobelbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(MedianFilterbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(CustomConvolutionbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(NonMaxSupressionbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(GaussianFilterbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(NonLinearFilterbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(LocalBinaryPatternbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(DataObjectCopybatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(GaussianImagePyramidbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(LaplacianImagePyramid_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(CannyEdgeDetector_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(HarrisCornerDetector_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(FastCornerDetector_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(remap_Register)); STATUS_ERROR_CHECK(ADD_KERENEL(TensorAdd_Register)); STATUS_ERROR_CHECK(ADD_KERENEL(TensorSubtract_Register)); STATUS_ERROR_CHECK(ADD_KERENEL(TensorMultiply_Register)); STATUS_ERROR_CHECK(ADD_KERENEL(TensorMatrixMultiply_Register)); STATUS_ERROR_CHECK(ADD_KERENEL(TensorLookup_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ColorTwist_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ColorTwistbatchPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CropMirrorNormalizePD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(CropPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCropMirrorPD_Register)); - STATUS_ERROR_CHECK(ADD_KERENEL(Copy_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ColorTwistbatchPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(CropMirrorNormalizePD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(CropPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(ResizeCropMirrorPD_Register)); + STATUS_ERROR_CHECK(ADD_KERENEL(Copy_Register)); STATUS_ERROR_CHECK(ADD_KERENEL(Nop_Register)); return status; } @@ -321,7 +132,7 @@ Add Kernels to the Kernel List *************************************************************************************************************/ vx_status ADD_KERENEL(std::function func) { - vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(Kernel_List->ADD(func)); - return status; + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(Kernel_List->ADD(func)); + return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 7499b28ff0..0172b78839 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -26,5761 +26,1761 @@ THE SOFTWARE. vx_uint32 getGraphAffinity(vx_graph graph) { AgoTargetAffinityInfo affinity; - vxQueryGraph(graph, VX_GRAPH_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity));; - if(affinity.device_type != AGO_TARGET_AFFINITY_GPU && affinity.device_type != AGO_TARGET_AFFINITY_CPU) + vxQueryGraph(graph, VX_GRAPH_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + ; + if (affinity.device_type != AGO_TARGET_AFFINITY_GPU && affinity.device_type != AGO_TARGET_AFFINITY_CPU) affinity.device_type = AGO_TARGET_AFFINITY_CPU; - // std::cerr<<"\n affinity "< %d\n", kernelName, p, params[p], status); @@ -5832,13 +1838,15 @@ vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx } } } - else { + else + { vxAddLogEntry((vx_reference)graph, VX_ERROR_INVALID_PARAMETERS, "createNode: failed to create node with kernel enum %d\n", kernelEnum); status = VX_ERROR_NO_MEMORY; } vxReleaseKernel(&kernel); } - else { + else + { vxAddLogEntry((vx_reference)graph, VX_ERROR_INVALID_PARAMETERS, "createNode: failed to retrieve kernel enum %d\n", kernelEnum); status = VX_ERROR_NOT_SUPPORTED; } @@ -5846,42 +1854,45 @@ vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx } #if ENABLE_OPENCL -int getEnvironmentVariable(const char * name) +int getEnvironmentVariable(const char *name) { - const char * text = getenv(name); - if (text) { + const char *text = getenv(name); + if (text) + { return atoi(text); } return -1; } -vx_status createGraphHandle(vx_node node, RPPCommonHandle ** pHandle) +vx_status createGraphHandle(vx_node node, RPPCommonHandle **pHandle) { - RPPCommonHandle * handle = NULL; + RPPCommonHandle *handle = NULL; STATUS_ERROR_CHECK(vxGetModuleHandle(node, OPENVX_KHR_RPP, (void **)&handle)); - if(handle) { + if (handle) + { handle->count++; } - else { + else + { handle = new RPPCommonHandle; memset(handle, 0, sizeof(*handle)); - const char * searchEnvName = "NN_MIOPEN_SEARCH"; + const char *searchEnvName = "NN_MIOPEN_SEARCH"; int isEnvSet = getEnvironmentVariable(searchEnvName); if (isEnvSet > 0) handle->exhaustiveSearch = true; handle->count = 1; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &handle->cmdq, sizeof(handle->cmdq))); - } *pHandle = handle; return VX_SUCCESS; } -vx_status releaseGraphHandle(vx_node node, RPPCommonHandle * handle) +vx_status releaseGraphHandle(vx_node node, RPPCommonHandle *handle) { handle->count--; - if(handle->count == 0) { + if (handle->count == 0) + { //TBD: release miopen_handle delete handle; STATUS_ERROR_CHECK(vxSetModuleHandle(node, OPENVX_KHR_RPP, NULL)); diff --git a/amd_openvx_extensions/amd_rpp/source/nop.cpp b/amd_openvx_extensions/amd_rpp/source/nop.cpp index 8ac4dfb05a..10e6a7cd92 100644 --- a/amd_openvx_extensions/amd_rpp/source/nop.cpp +++ b/amd_openvx_extensions/amd_rpp/source/nop.cpp @@ -22,7 +22,8 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct NopLocalData { +struct NopLocalData +{ #if ENABLE_OPENCL RPPCommonHandle handle; @@ -41,7 +42,6 @@ struct NopLocalData { void *hip_pSrc; void *hip_pDst; #endif - }; static vx_status VX_CALLBACK validateNop(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) @@ -59,7 +59,7 @@ static vx_status VX_CALLBACK validateNop(vx_node node, const vx_reference parame STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &df_image, sizeof(df_image))); - vx_uint32 height, width; + vx_uint32 height, width; STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); @@ -70,35 +70,37 @@ static vx_status VX_CALLBACK validateNop(vx_node node, const vx_reference parame return status; } -static vx_status VX_CALLBACK processNop(vx_node node, const vx_reference * parameters, vx_uint32 num) +static vx_status VX_CALLBACK processNop(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NopLocalData * data = NULL; + NopLocalData *data = NULL; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); vx_df_image df_image = VX_DF_IMAGE_VIRT; STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if(data->device_type == AGO_TARGET_AFFINITY_GPU) { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) + { #if ENABLE_OPENCL - cl_command_queue handle = data->handle.cmdq; STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); - unsigned size = data->dimensions.height* data->dimensions.width; + unsigned size = data->dimensions.height * data->dimensions.width; #endif - } else if(data->device_type == AGO_TARGET_AFFINITY_CPU) { + } + else if (data->device_type == AGO_TARGET_AFFINITY_CPU) + { STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - unsigned size = data->dimensions.height* data->dimensions.width; + unsigned size = data->dimensions.height * data->dimensions.width; } return VX_SUCCESS; } static vx_status VX_CALLBACK initializeNop(vx_node node, const vx_reference *parameters, vx_uint32 num) { - NopLocalData * data = new NopLocalData; + NopLocalData *data = new NopLocalData; memset(data, 0, sizeof(*data)); #if ENABLE_OPENCL @@ -112,7 +114,7 @@ static vx_status VX_CALLBACK initializeNop(vx_node node, const vx_reference *par #if ENABLE_OPENCL STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); #else - //STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, data->pSrc, sizeof(data->pSrc))); #endif STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -125,10 +127,33 @@ static vx_status VX_CALLBACK uninitializeNop(vx_node node, const vx_reference *p return VX_SUCCESS; } +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) +{ + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + + return VX_SUCCESS; +} + vx_status Nop_Register(vx_context context) { vx_status status = VX_SUCCESS; -// add kernel to the context with callbacks + // add kernel to the context with callbacks vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Nop", VX_KERNEL_RPP_NOP, processNop, @@ -139,27 +164,30 @@ vx_status Nop_Register(vx_context context) ERROR_CHECK_OBJECT(kernel); AgoTargetAffinityInfo affinity; - vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY,&affinity, sizeof(affinity)); + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); #if ENABLE_OPENCL // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers vx_bool enableBufferAccess = vx_true_e; - if(affinity.device_type == AGO_TARGET_AFFINITY_GPU) - STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else vx_bool enableBufferAccess = vx_false_e; #endif + amd_kernel_query_target_support_f query_target_support_f = query_target_support; if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } if (status != VX_SUCCESS) { - exit: vxRemoveKernel(kernel); return VX_FAILURE; + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; } return status; diff --git a/apps/image_augmentation/CMakeLists.txt b/apps/image_augmentation/CMakeLists.txt index 2ae787d4fa..6a2c03a615 100644 --- a/apps/image_augmentation/CMakeLists.txt +++ b/apps/image_augmentation/CMakeLists.txt @@ -34,6 +34,13 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../amd_openvx/cmake) find_package(OpenCV QUIET) find_package(AMDRPP QUIET) +#if("${BACKEND}" STREQUAL "HIP") +# set(ROCM_PATH /opt/rocm CACHE PATH "mivisionx default ROCm installation path") +# set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH "Path to which HIP has been installed") +# list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake) +# find_package(HIP QUIET REQUIRED) +#endif() + include_directories ( ${OpenCV_INCLUDE_DIRS} /opt/rocm/mivisionx/include/) link_directories (/opt/rocm/mivisionx/lib/) diff --git a/rocAL/rocAL/CMakeLists.txt b/rocAL/rocAL/CMakeLists.txt index edf02eba1f..ee88491f78 100644 --- a/rocAL/rocAL/CMakeLists.txt +++ b/rocAL/rocAL/CMakeLists.txt @@ -141,16 +141,16 @@ if(${BUILD_RALI}) protobuf_generate_cpp(CAFFE2_PROTO_SRCS CAFFE2_PROTO_HEADERS proto/caffe2_protos.proto) protobuf_generate_cpp(CAFFE_PROTO_SRCS CAFFE_PROTO_HEADERS proto/caffe_protos.proto) link_directories(${AMDRPP_LIBRARIES_DIR} ${TurboJpeg_LIBRARIES_DIR} ${PROTOBUF_LIBRARY_DIRS} /usr/local/lib/) - + if("${BACKEND}" STREQUAL "HIP" AND HIP_FOUND) add_subdirectory(rocAL_hip) link_directories(${HIP_PATH}/lib) message("-- ${Green}rocAL library is going to be built with HIP support ${ColourReset}") endif() - + file(GLOB SOURCES "./source/*.cpp") add_library(${PROJECT_NAME} SHARED ${SOURCES} ${TF_PROTO_SRCS} ${TF_PROTO_HEADERS} ${CAFFE_PROTO_HEADERS} ${CAFFE_PROTO_SRCS} ${CAFFE2_PROTO_SRCS} ${CAFFE2_PROTO_HEADERS}) - + if("${BACKEND}" STREQUAL "HIP" AND HIP_FOUND) include_directories(${HIP_PATH}/include ${HSA_PATH}/include rocAL_hip) target_compile_definitions(${PROJECT_NAME} PRIVATE __HIP_PLATFORM_HCC__) diff --git a/utilities/rali/rali_unittests/rali_unittests.cpp b/utilities/rali/rali_unittests/rali_unittests.cpp index 04fe9340c8..62da2116fc 100644 --- a/utilities/rali/rali_unittests/rali_unittests.cpp +++ b/utilities/rali/rali_unittests/rali_unittests.cpp @@ -379,12 +379,14 @@ int test(int test_case, const char *path, const char *outName, int rgb, int gpu, << "raliExposure" << std::endl; image1 = raliExposure(handle, image0, true); } + break; case 21: { std::cout << ">>>>>>> Running " << "raliHue" << std::endl; image1 = raliHue(handle, image0, true); } + break; case 22: { std::cout << ">>>>>>> Running " @@ -712,7 +714,7 @@ int test(int test_case, const char *path, const char *outName, int rgb, int gpu, std::string out_filename = std::string(outName) + ".png"; // in case the user specifies non png filename if (display_all) out_filename = std::string(outName) + std::to_string(index) + ".png"; // in case the user specifies non png filename - + if (color_format == RaliImageColor::RALI_COLOR_RGB24) { cv::cvtColor(mat_output, mat_color, CV_RGB2BGR);