From 0808d5745ad26325c46136472789f406f27d1d97 Mon Sep 17 00:00:00 2001 From: Fiona-MCW <70996026+fiona-gladwin@users.noreply.github.com> Date: Mon, 17 Apr 2023 23:41:22 +0530 Subject: [PATCH] rocAL Tensor Video pipeline changes (#19) * Add changes to the video pipeline Change instances of image to tensor Remove video loader module Remove VideoDecoderConfig, VideoReaderConfig Remove VideoStorageType and VideoDecoderType * Add tensor support for Sequence Rearrange Code clean up * Add sequence frame num and time stamps loader All loader API call in master graph * Add tensor support for sequence reader Add sequence batch size variable to change the batch size based on sequence length * Add support to allocate N*F ROI for sequences * Add chnanges to process sequences brightness and CMN * Minor changes * Minor formatting changes * Add check to pass only 5D tensors to SequenceRearrange * Minor changes Move SequenceInfo back to video reader Null ptr check Variable name change * Query params directly from parameters in SequenceRearrange --- .../amd_rpp/include/vx_ext_rpp.h | 2 +- .../amd_rpp/source/Brightness.cpp | 11 +- .../amd_rpp/source/CropMirrorNormalize.cpp | 13 +- .../amd_rpp/source/SequenceRearrange.cpp | 371 ++++++++---------- .../amd_rpp/source/kernel_rpp.cpp | 38 +- .../include/api/rocal_api_augmentation.h | 8 +- .../include/api/rocal_api_data_loaders.h | 174 ++++---- .../augmentations/node_sequence_rearrange.h | 9 +- rocAL/rocAL/include/decoders/image/decoder.h | 4 +- .../include/decoders/video/video_decoder.h | 14 - .../decoders/video/video_decoder_factory.h | 3 +- .../loaders/{image => }/loader_module.h | 8 +- .../include/loaders/video/node_video_loader.h | 6 +- .../video/node_video_loader_single_shard.h | 6 +- .../include/loaders/video/video_loader.h | 23 +- .../loaders/video/video_loader_module.h | 66 ---- .../loaders/video/video_loader_sharded.h | 13 +- .../loaders/video/video_read_and_decode.h | 11 +- rocAL/rocAL/include/pipeline/master_graph.h | 2 +- rocAL/rocAL/include/pipeline/tensor.h | 4 + rocAL/rocAL/include/readers/image/reader.h | 24 +- .../readers/video/video_file_source_reader.h | 2 +- .../include/readers/video/video_reader.h | 58 +-- .../readers/video/video_reader_factory.h | 2 +- .../source/api/rocal_api_augmentation.cpp | 45 +-- .../source/api/rocal_api_data_loaders.cpp | 299 +++++++------- .../node_crop_mirror_normalize.cpp | 2 + .../augmentations/node_sequence_rearrange.cpp | 34 +- .../decoders/video/video_decoder_factory.cpp | 6 +- .../loaders/video/node_video_loader.cpp | 8 +- .../video/node_video_loader_single_shard.cpp | 8 +- .../source/loaders/video/video_loader.cpp | 42 +- .../loaders/video/video_loader_sharded.cpp | 6 +- .../loaders/video/video_read_and_decode.cpp | 35 +- rocAL/rocAL/source/pipeline/master_graph.cpp | 4 + rocAL/rocAL/source/pipeline/node.cpp | 2 + rocAL/rocAL/source/pipeline/tensor.cpp | 8 +- .../video/video_file_source_reader.cpp | 2 +- .../readers/video/video_reader_factory.cpp | 4 +- 39 files changed, 592 insertions(+), 785 deletions(-) rename rocAL/rocAL/include/loaders/{image => }/loader_module.h (91%) delete mode 100644 rocAL/rocAL/include/loaders/video/video_loader_module.h diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index 48c276dca6..cb91c7b5c1 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -126,13 +126,13 @@ extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_ThresholdingbatchPD(v extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_VignettebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array stdDev,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array affine,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array perspective,vx_uint32 nbatchSize); -extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph,vx_image pSrc,vx_image pDst, vx_array newOrder,vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Resizetensor(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_uint32 nbatchSize); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Brightness(vx_graph graph, vx_tensor pSrc, vx_tensor srcROI, vx_tensor pDst, vx_array alpha, vx_array beta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Copy(vx_graph graph, vx_tensor pSrc, vx_tensor pDst); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_CropMirrorNormalize(vx_graph graph, vx_tensor pSrc, vx_tensor srcROI, vx_tensor pDst, vx_array multiplier, vx_array offset, vx_array mirror, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType); extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Nop(vx_graph graph, vx_tensor pSrc, vx_tensor pDst); +extern "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_array newOrder, vx_scalar layout); #ifdef __cplusplus } #endif diff --git a/amd_openvx_extensions/amd_rpp/source/Brightness.cpp b/amd_openvx_extensions/amd_rpp/source/Brightness.cpp index 79e427176d..856ed26846 100644 --- a/amd_openvx_extensions/amd_rpp/source/Brightness.cpp +++ b/amd_openvx_extensions/amd_rpp/source/Brightness.cpp @@ -46,8 +46,8 @@ struct BrightnessLocalData { static vx_status VX_CALLBACK refreshBrightness(vx_node node, const vx_reference *parameters, vx_uint32 num, BrightnessLocalData *data) { vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->srcDescPtr->n, sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->srcDescPtr->n, sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0], sizeof(vx_float32), data->alpha, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0], sizeof(vx_float32), data->beta, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_HIP @@ -63,15 +63,12 @@ static vx_status VX_CALLBACK refreshBrightness(vx_node node, const vx_reference data->roiPtr = (RpptROI *)data->roiTensorPtr; if((data->inputLayout == 2 || data->inputLayout == 3)) { // For NFCHW and NFHWC formats unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F' - for(int n = data->srcDescPtr->n - 1; n >= 0; n--) { + for(int n = data->inputTensorDims[0] - 1; n >= 0; n--) { unsigned index = n * num_of_frames; for(int f = 0; f < num_of_frames; f++) { data->alpha[index + f] = data->alpha[n]; data->beta[index + f] = data->beta[n]; - data->roiPtr[index + f].xywhROI.xy.x = data->roiPtr[n].xywhROI.xy.x; - data->roiPtr[index + f].xywhROI.xy.y = data->roiPtr[n].xywhROI.xy.y; - data->roiPtr[index + f].xywhROI.roiWidth = data->roiPtr[n].xywhROI.roiWidth; - data->roiPtr[index + f].xywhROI.roiHeight = data->roiPtr[n].xywhROI.roiHeight; + data->roiPtr[index + f].xywhROI = data->roiPtr[n].xywhROI; } } } diff --git a/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalize.cpp b/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalize.cpp index b69fa0e857..fe942899ba 100644 --- a/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalize.cpp +++ b/amd_openvx_extensions/amd_rpp/source/CropMirrorNormalize.cpp @@ -47,9 +47,9 @@ struct CropMirrorNormalizeLocalData { static vx_status VX_CALLBACK refreshCropMirrorNormalize(vx_node node, const vx_reference *parameters, vx_uint32 num, CropMirrorNormalizeLocalData *data) { vx_status status = VX_SUCCESS; - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->srcDescPtr->n * data->srcDescPtr->c, sizeof(vx_float32), data->multiplier, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->srcDescPtr->n * data->srcDescPtr->c, sizeof(vx_float32), data->offset, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->srcDescPtr->n, sizeof(vx_uint32), data->mirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->inputTensorDims[0] * data->srcDescPtr->c, sizeof(vx_float32), data->multiplier, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->inputTensorDims[0] * data->srcDescPtr->c, sizeof(vx_float32), data->offset, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->inputTensorDims[0], sizeof(vx_uint32), data->mirror, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_HIP @@ -66,7 +66,7 @@ static vx_status VX_CALLBACK refreshCropMirrorNormalize(vx_node node, const vx_r data->roiPtr = (RpptROI *)data->roiTensorPtr; if(data->inputLayout == 2 || data->inputLayout == 3) { // For NFCHW and NFHWC formats unsigned num_of_frames = data->inputTensorDims[1]; // Num of frames 'F' - for(int n = data->srcDescPtr->n - 1; n >= 0; n--) { + for(int n = data->inputTensorDims[0] - 1; n >= 0; n--) { unsigned index = n * num_of_frames; for(int f = 0; f < num_of_frames; f++) { for(int c = 0; c < data->srcDescPtr->c; c++) { @@ -76,10 +76,7 @@ static vx_status VX_CALLBACK refreshCropMirrorNormalize(vx_node node, const vx_r data->offset[dst_ind] = data->offset[src_ind]; } data->mirror[index + f] = data->mirror[n]; - data->roiPtr[index + f].xywhROI.xy.x = data->roiPtr[n].xywhROI.xy.x; - data->roiPtr[index + f].xywhROI.xy.y = data->roiPtr[n].xywhROI.xy.y; - data->roiPtr[index + f].xywhROI.roiWidth = data->roiPtr[n].xywhROI.roiWidth; - data->roiPtr[index + f].xywhROI.roiHeight = data->roiPtr[n].xywhROI.roiHeight; + data->roiPtr[index + f].xywhROI = data->roiPtr[n].xywhROI; } } } diff --git a/amd_openvx_extensions/amd_rpp/source/SequenceRearrange.cpp b/amd_openvx_extensions/amd_rpp/source/SequenceRearrange.cpp index 5862e2e32b..1b9738d639 100644 --- a/amd_openvx_extensions/amd_rpp/source/SequenceRearrange.cpp +++ b/amd_openvx_extensions/amd_rpp/source/SequenceRearrange.cpp @@ -1,20 +1,20 @@ /* -SequenceRearrangeright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a SequenceRearrange +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights -to use, SequenceRearrange, modify, merge, publish, distribute, sublicense, and/or sell +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above SequenceRearrangeright notice and this permission notice shall be included in +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR SequenceRearrangeRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -22,258 +22,212 @@ THE SOFTWARE. #include "internal_publishKernels.h" -struct SequenceRearrangeLocalData -{ - RPPCommonHandle handle; - RppiSize dimensions; +struct SequenceRearrangeLocalData { + RPPCommonHandle * handle; RppPtr_t pSrc; RppPtr_t pDst; - Rpp32u device_type; - vx_uint32 new_sequence_length; - vx_uint32 sequence_length; - vx_uint32 sequence_count; - vx_uint32 *new_order; + Rpp32u deviceType; + vx_uint32 newSequenceLength; + vx_uint32 sequenceLength; + vx_uint32 *newOrder; + Rpp32s layout; + RpptDescPtr srcDescPtr; + RpptDesc srcDesc; + RpptDescPtr dstDescPtr; + RpptDesc dstDesc; #if ENABLE_OPENCL cl_mem cl_pSrc; cl_mem cl_pDst; -#elif ENABLE_HIP - void *hip_pSrc; - void *hip_pDst; #endif }; -static vx_status VX_CALLBACK validateSequenceRearrange(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) -{ +static vx_status VX_CALLBACK refreshSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num, SequenceRearrangeLocalData *data) { + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->newSequenceLength, sizeof(vx_uint32), data->newOrder, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_OPENCL, &data->cl_pSrc, sizeof(data->cl_pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_OPENCL, &data->cl_pDst, sizeof(data->cl_pDst))); +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst))); +#endif + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + } + return status; +} + +static vx_status VX_CALLBACK validateSequenceRearrange(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { // check scalar alpha and beta type vx_status status = VX_SUCCESS; vx_enum scalar_type; STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) + if (scalar_type != VX_TYPE_INT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); if (scalar_type != VX_TYPE_UINT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); - vx_parameter param = vxGetParameterByIndex(node, 1); - vx_image image; - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryParameter(param, VX_PARAMETER_ATTRIBUTE_REF, &image, sizeof(vx_image))); - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB) - status = VX_ERROR_INVALID_VALUE; - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &df_image, sizeof(df_image))); - vx_uint32 height, width; - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); - STATUS_ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width))); - STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_WIDTH, &width, sizeof(width))); - vxReleaseImage(&image); + + // Check for input parameters + size_t num_tensor_dims; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if(num_tensor_dims != 5) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: SequenceRearrange: tensor: #0 dimensions=%lu (must be equal to 5)\n", num_tensor_dims); + + // Check for output parameters + vx_uint8 tensor_fixed_point_position; + size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; + vx_enum tensor_type; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if(num_tensor_dims != 5) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: SequenceRearrange: tensor: #1 dimensions=%lu (must be equal to 5)\n", num_tensor_dims); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_type, sizeof(tensor_type))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + return status; } -static vx_status VX_CALLBACK processSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ +static vx_status VX_CALLBACK processSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) { SequenceRearrangeLocalData *data = NULL; - vx_status return_status = VX_SUCCESS; + vx_status status = VX_SUCCESS; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - vx_df_image df_image = VX_DF_IMAGE_VIRT; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); + refreshSequenceRearrange(node, parameters, num, data); + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_OPENCL cl_command_queue handle = data->handle.cmdq; - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (df_image == VX_DF_IMAGE_U8) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)); - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_offset = dst_sequence_start_address + (dst_index * elem_size); - auto src_offset = src_sequence_start_address + (src_index * elem_size); - if (clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, src_offset, dst_offset, elem_size, 0, NULL, NULL) != CL_SUCCESS) - return VX_FAILURE; - } - } - } - else if (df_image == VX_DF_IMAGE_RGB) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)) * 3; - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_offset = dst_sequence_start_address + (dst_index * elem_size); - auto src_offset = src_sequence_start_address + (src_index * elem_size); - if (clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, src_offset, dst_offset, elem_size, 0, NULL, NULL) != CL_SUCCESS) + for (int sequence_cnt = 0; sequence_cnt < data->srcDescPtr->n; sequence_cnt++) { + unsigned src_sequence_start_address = sequence_cnt * data->srcDescPtr->strides.nStride * data->sequenceLength; + unsigned dst_sequence_start_address = sequence_cnt * data->dstDescPtr->strides.nStride * data->newSequenceLength; + for (unsigned dst_index = 0; dst_index < (data->newSequenceLength); dst_index++) { + unsigned src_index = data->newOrder[dst_index]; + if (src_index > data->sequenceLength) + ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequenceLength - 1); + auto dst_offset = (unsigned char *)data->cl_pDst + dst_sequence_start_address + (dst_index * data->srcDescPtr->strides.nStride); + auto src_offset = (unsigned char *)data->cl_pSrc + src_sequence_start_address + (src_index * data->dstDescPtr->strides.nStride); + if (clEnqueueCopyBuffer(handle, data->cl_pSrc, data->cl_pDst, src_offset, dst_offset, data->srcDescPtr->strides.nStride, 0, NULL, NULL) != CL_SUCCESS) return VX_FAILURE; - } } } - return_status = VX_SUCCESS; #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (df_image == VX_DF_IMAGE_U8) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)); - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->hip_pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->hip_pSrc + src_sequence_start_address + (src_index * elem_size); - hipError_t status = hipMemcpyDtoD(dst_address, src_address, elem_size); + for (int sequence_cnt = 0; sequence_cnt < data->srcDescPtr->n; sequence_cnt++) { + unsigned src_sequence_start_address = sequence_cnt * data->srcDescPtr->strides.nStride * data->sequenceLength; + unsigned dst_sequence_start_address = sequence_cnt * data->dstDescPtr->strides.nStride * data->newSequenceLength; + for (unsigned dst_index = 0; dst_index < (data->newSequenceLength); dst_index++) { + unsigned src_index = data->newOrder[dst_index]; + if (src_index > data->sequenceLength) + ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequenceLength - 1); + auto dst_address = (unsigned char *)data->pDst + dst_sequence_start_address + (dst_index * data->srcDescPtr->strides.nStride); + auto src_address = (unsigned char *)data->pSrc + src_sequence_start_address + (src_index * data->dstDescPtr->strides.nStride); + hipError_t status = hipMemcpyDtoD(dst_address, src_address, data->srcDescPtr->strides.nStride); if (status != hipSuccess) - return VX_FAILURE; - } - } - } - else if (df_image == VX_DF_IMAGE_RGB) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)) * 3; - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->hip_pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->hip_pSrc + src_sequence_start_address + (src_index * elem_size); - hipError_t status = hipMemcpyDtoD(dst_address, src_address, elem_size); - if (status != hipSuccess) - return VX_FAILURE; - } + return VX_FAILURE; } } - return_status = VX_SUCCESS; #endif - } - else if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8))); - unsigned size = data->dimensions.height * data->dimensions.width; - if (df_image == VX_DF_IMAGE_U8) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)); - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->pSrc + src_sequence_start_address + (src_index * elem_size); - memcpy(dst_address, src_address, elem_size); - } + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + for (int sequence_cnt = 0; sequence_cnt < data->srcDescPtr->n; sequence_cnt++) { + unsigned src_sequence_start_address = sequence_cnt * data->srcDescPtr->strides.nStride * data->sequenceLength; + unsigned dst_sequence_start_address = sequence_cnt * data->dstDescPtr->strides.nStride * data->newSequenceLength; + for (unsigned dst_index = 0; dst_index < (data->newSequenceLength); dst_index++) { + unsigned src_index = data->newOrder[dst_index]; + if (src_index > data->sequenceLength) + ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequenceLength - 1); + auto dst_address = (unsigned char *)data->pDst + dst_sequence_start_address + (dst_index * data->srcDescPtr->strides.nStride); + auto src_address = (unsigned char *)data->pSrc + src_sequence_start_address + (src_index * data->dstDescPtr->strides.nStride); + memcpy(dst_address, src_address, data->srcDescPtr->strides.nStride); } } - else if (df_image == VX_DF_IMAGE_RGB) - { - unsigned elem_size = (size / (data->sequence_length * data->sequence_count)) * 3; - for (int sequence_cnt = 0; sequence_cnt < data->sequence_count; sequence_cnt++) - { - unsigned src_sequence_start_address = sequence_cnt * elem_size * data->sequence_length; - unsigned dst_sequence_start_address = sequence_cnt * elem_size * data->new_sequence_length; - for (unsigned dst_index = 0; dst_index < (data->new_sequence_length); dst_index++) - { - unsigned src_index = data->new_order[dst_index]; - if (src_index > data->sequence_length) - ERRMSG(VX_ERROR_INVALID_VALUE, "invalid new order value=%d (must be between 0-%d)\n", src_index, data->sequence_length - 1); - auto dst_address = (unsigned char *)data->pDst + dst_sequence_start_address + (dst_index * elem_size); - auto src_address = (unsigned char *)data->pSrc + src_sequence_start_address + (src_index * elem_size); - memcpy(dst_address, src_address, elem_size); - } - } - } - return_status = VX_SUCCESS; } - return return_status; + return status; } -static vx_status VX_CALLBACK initializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ +static vx_status VX_CALLBACK initializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) { SequenceRearrangeLocalData *data = new SequenceRearrangeLocalData; memset(data, 0, sizeof(*data)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream))); -#endif - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->dimensions.height, sizeof(data->dimensions.height))); - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->dimensions.width, sizeof(data->dimensions.width))); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->new_sequence_length, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->sequence_length, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[5], &data->sequence_count, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - data->new_order = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->new_sequence_length); - STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->new_sequence_length, sizeof(vx_uint32), data->new_order, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); -#if ENABLE_OPENCL - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc))); -#elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc))); -#else - STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, data->pSrc, sizeof(data->pSrc))); -#endif + + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + + vx_size in_num_of_dims, out_num_of_dims; + size_t in_tensor_dims[RPP_MAX_TENSOR_DIMS], out_tensor_dims[RPP_MAX_TENSOR_DIMS]; + + // Querying for input tensor + data->srcDescPtr = &data->srcDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &in_num_of_dims, sizeof(vx_size))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, in_tensor_dims, sizeof(vx_size) * in_num_of_dims)); + data->srcDescPtr->offsetInBytes = 0; + fillDescriptionPtrfromDims(data->srcDescPtr, data->layout, in_tensor_dims); + + // Querying for output tensor + data->dstDescPtr = &data->dstDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &out_num_of_dims, sizeof(vx_size))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, out_tensor_dims, sizeof(vx_size) * out_num_of_dims)); + data->dstDescPtr->offsetInBytes = 0; + fillDescriptionPtrfromDims(data->dstDescPtr, data->layout, out_tensor_dims); + + data->srcDescPtr->n = in_tensor_dims[0]; + data->sequenceLength = in_tensor_dims[1]; + + data->dstDescPtr->n = out_tensor_dims[0]; + data->newSequenceLength = out_tensor_dims[1]; + data->newOrder = (vx_uint32 *)malloc(sizeof(vx_uint32) * data->newSequenceLength); + refreshSequenceRearrange(node, parameters, num, data); + STATUS_ERROR_CHECK(createGraphHandle(node, &data->handle, data->srcDescPtr->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; } -static vx_status VX_CALLBACK uninitializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) -{ +static vx_status VX_CALLBACK uninitializeSequenceRearrange(vx_node node, const vx_reference *parameters, vx_uint32 num) { + SequenceRearrangeLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + STATUS_ERROR_CHECK(releaseGraphHandle(node, data->handle, data->deviceType)); + if(data->newOrder) free(data->newOrder); + delete (data); return VX_SUCCESS; } -vx_status SequenceRearrange_Register(vx_context context) -{ +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + +// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes +#if ENABLE_OPENCL + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; +#endif + return VX_SUCCESS; +} + +vx_status SequenceRearrange_Register(vx_context context) { vx_status status = VX_SUCCESS; // add kernel to the context with callbacks vx_kernel kernel = vxAddUserKernel(context, "org.rpp.SequenceRearrange", VX_KERNEL_RPP_SEQUENCEREARRANGE, processSequenceRearrange, - 7, + 5, validateSequenceRearrange, initializeSequenceRearrange, uninitializeSequenceRearrange); ERROR_CHECK_OBJECT(kernel); AgoTargetAffinityInfo affinity; vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); -#if ENABLE_OPENCL +#if ENABLE_OPENCL || ENABLE_HIP // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers vx_bool enableBufferAccess = vx_true_e; if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) @@ -281,19 +235,18 @@ vx_status SequenceRearrange_Register(vx_context context) #else vx_bool enableBufferAccess = vx_false_e; #endif - if (kernel) - { - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } - if (status != VX_SUCCESS) - { + if (status != VX_SUCCESS) { exit: vxRemoveKernel(kernel); return VX_FAILURE; } return status; diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 2afe4c2643..f6cddfdabc 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -1852,28 +1852,22 @@ VX_API_CALL vx_node VX_API_CALL vxExtrppNode_NopbatchPD(vx_graph graph, vx_image return node; } -VX_API_CALL vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph,vx_image pSrc,vx_image pDst, vx_array newOrder, vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if(vxGetStatus((vx_reference)context) == VX_SUCCESS) { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_scalar NEWSEQUENCELENGTH = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &newSequenceLength); - vx_scalar SEQUENCELENGTH = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceLength); - vx_scalar SEQUENCECOUNT = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &sequenceCount); - vx_reference params[] = { - (vx_reference) pSrc, - (vx_reference) pDst, - (vx_reference) newOrder, - (vx_reference) NEWSEQUENCELENGTH, - (vx_reference) SEQUENCELENGTH, - (vx_reference) SEQUENCECOUNT, - (vx_reference) DEV_TYPE - }; - node = createNode(graph, VX_KERNEL_RPP_SEQUENCEREARRANGE, params, 7); - } - return node; +VX_API_CALL vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_array newOrder, vx_scalar layout) { + vx_node node = NULL; + vx_context context = vxGetContext((vx_reference)graph); + if(vxGetStatus((vx_reference)context) == VX_SUCCESS) { + vx_uint32 dev_type = getGraphAffinity(graph); + vx_scalar dev_type_scalar = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); + vx_reference params[] = { + (vx_reference) pSrc, + (vx_reference) pDst, + (vx_reference) newOrder, + (vx_reference) layout, + (vx_reference) dev_type_scalar + }; + node = createNode(graph, VX_KERNEL_RPP_SEQUENCEREARRANGE, params, 5); + } + return node; } VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_Brightness(vx_graph graph, vx_tensor pSrc, vx_tensor srcROI, vx_tensor pDst, vx_array alpha, vx_array beta, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType) diff --git a/rocAL/rocAL/include/api/rocal_api_augmentation.h b/rocAL/rocAL/include/api/rocal_api_augmentation.h index c9cd2e1230..48e70626f6 100644 --- a/rocAL/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/rocAL/include/api/rocal_api_augmentation.h @@ -31,13 +31,11 @@ THE SOFTWARE. /// \param context /// \param input /// \param new_order -/// \param new_sequence_length -/// \param sequence_length /// \param is_output /// \return -extern "C" RocalImage ROCAL_API_CALL rocalSequenceRearrange(RocalContext context, RocalImage input, - unsigned int* new_order, unsigned int new_sequence_length, - unsigned int sequence_length, bool is_output ); +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceRearrange(RocalContext p_context, RocalTensor input, + std::vector& new_order, + bool is_output); /// Accepts U8 and RGB24 input. /// \param context diff --git a/rocAL/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/rocAL/include/api/rocal_api_data_loaders.h index 36dd8656b6..aecc766ef2 100644 --- a/rocAL/rocAL/include/api/rocal_api_data_loaders.h +++ b/rocAL/rocAL/include/api/rocal_api_data_loaders.h @@ -85,17 +85,17 @@ extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalCont /// \param loop Determines if the user wants to indefinitely loops through images or not. /// \param step: Frame interval between each sequence. /// \param stride: Frame interval between frames in a sequence. -/// \return Reference to the output image. -extern "C" RocalImage ROCAL_API_CALL rocalSequenceReader(RocalContext context, - const char* source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - unsigned sequence_length, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0); +/// \return Reference to the output tensor. +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReader(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + unsigned sequence_length, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0); /// Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only. /// \param context Rocal context @@ -109,18 +109,18 @@ extern "C" RocalImage ROCAL_API_CALL rocalSequenceReader(RocalContext context, /// \param loop Determines if the user wants to indefinitely loops through images or not. /// \param step: Frame interval between each sequence. /// \param stride: Frame interval between frames in a sequence. -/// \return Reference to the output image -extern "C" RocalImage ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context, - const char* source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0); +/// \return Reference to the output tensor +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0); /// Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. /// If images are not Jpeg compressed they will be ignored. @@ -550,18 +550,18 @@ extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalCo /// \param stride: Frame interval between frames in a sequence. /// \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. /// \return -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSource(RocalContext context, - const char* source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned internal_shard_count, - unsigned sequence_length, - bool is_output = false, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true); +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSource(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned internal_shard_count, + unsigned sequence_length, + bool is_output = false, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true); /// Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only. /// \param context Rocal context @@ -579,19 +579,19 @@ extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSource(RocalContext context /// \param stride: Frame interval between frames in a sequence. /// \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. /// \return -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context, - const char* source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true); +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true); /// Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. /// \param context Rocal context @@ -610,25 +610,25 @@ extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalCont /// \param stride: Frame interval between frames in a sequence. /// \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. /// \return -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResize(RocalContext context, - const char* source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned internal_shard_count, - unsigned sequence_length, - unsigned dest_width, - unsigned dest_height, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResize(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned internal_shard_count, + unsigned sequence_length, + unsigned dest_width, + unsigned dest_height, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); /// Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only. /// \param context Rocal context @@ -648,26 +648,26 @@ extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResize(RocalContext context /// \param stride: Frame interval between frames in a sequence. /// \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. /// \return -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context, - const char* source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - unsigned dest_width, - unsigned dest_height, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + unsigned dest_width, + unsigned dest_height, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); /// Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems. /// \param context Rocal context diff --git a/rocAL/rocAL/include/augmentations/node_sequence_rearrange.h b/rocAL/rocAL/include/augmentations/node_sequence_rearrange.h index 247b7728d5..e5bd1fb0a2 100644 --- a/rocAL/rocAL/include/augmentations/node_sequence_rearrange.h +++ b/rocAL/rocAL/include/augmentations/node_sequence_rearrange.h @@ -26,17 +26,14 @@ THE SOFTWARE. #include "parameter_vx.h" #include "graph.h" -class SequenceRearrangeNode : public Node -{ +class SequenceRearrangeNode : public Node { public: - SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs); + SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs); SequenceRearrangeNode() = delete; - void init(unsigned int* new_order, unsigned int new_sequence_length, unsigned int sequence_length, unsigned int sequence_count); + void init(std::vector& new_order); protected: void create_node() override; void update_node() override; private: std::vector _new_order; - unsigned int _new_sequence_length, _sequence_length, _sequence_count; - vx_array _sequence_array; }; diff --git a/rocAL/rocAL/include/decoders/image/decoder.h b/rocAL/rocAL/include/decoders/image/decoder.h index aea8f6c86b..a4b6b245f6 100644 --- a/rocAL/rocAL/include/decoders/image/decoder.h +++ b/rocAL/rocAL/include/decoders/image/decoder.h @@ -35,7 +35,9 @@ enum class DecoderType OPENCV_DEC = 2, //!< for back_up decoding HW_JPEG_DEC = 3, SKIP_DECODE = 4, //!< For skipping decoding in case of uncompressed data from reader - OVX_FFMPEG,//!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously + OVX_FFMPEG = 5,//!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously + FFMPEG_SOFTWARE_DECODE = 6, + FFMPEG_HARDWARE_DECODE = 7, }; diff --git a/rocAL/rocAL/include/decoders/video/video_decoder.h b/rocAL/rocAL/include/decoders/video/video_decoder.h index e6549140f1..320ad76179 100644 --- a/rocAL/rocAL/include/decoders/video/video_decoder.h +++ b/rocAL/rocAL/include/decoders/video/video_decoder.h @@ -44,20 +44,6 @@ extern "C" #endif #include "parameter_factory.h" -enum class VideoDecoderType -{ - FFMPEG_SOFTWARE_DECODE = 0, - FFMPEG_HARDWARE_DECODE = 1, -}; - -class VideoDecoderConfig -{ -public: - VideoDecoderConfig() {} - explicit VideoDecoderConfig(VideoDecoderType type) : _type(type) {} - virtual VideoDecoderType type() { return _type; }; - VideoDecoderType _type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; -}; #ifdef ROCAL_VIDEO class VideoDecoder diff --git a/rocAL/rocAL/include/decoders/video/video_decoder_factory.h b/rocAL/rocAL/include/decoders/video/video_decoder_factory.h index 0b108e465a..07001c16a7 100644 --- a/rocAL/rocAL/include/decoders/video/video_decoder_factory.h +++ b/rocAL/rocAL/include/decoders/video/video_decoder_factory.h @@ -23,7 +23,8 @@ THE SOFTWARE. #pragma once #include #include "video_decoder.h" +#include "decoder.h" #ifdef ROCAL_VIDEO -std::shared_ptr create_video_decoder(VideoDecoderConfig config); +std::shared_ptr create_video_decoder(DecoderConfig config); #endif diff --git a/rocAL/rocAL/include/loaders/image/loader_module.h b/rocAL/rocAL/include/loaders/loader_module.h similarity index 91% rename from rocAL/rocAL/include/loaders/image/loader_module.h rename to rocAL/rocAL/include/loaders/loader_module.h index 37605302a5..fd096b26b4 100644 --- a/rocAL/rocAL/include/loaders/image/loader_module.h +++ b/rocAL/rocAL/include/loaders/loader_module.h @@ -25,7 +25,7 @@ THE SOFTWARE. #include "reader.h" #include "decoder.h" #include "commons.h" -#include "image.h" +#include "tensor.h" #include "circular_buffer.h" #include "meta_data_reader.h" #include "meta_data_graph.h" @@ -46,8 +46,8 @@ class LoaderModule { public: virtual void initialize(ReaderConfig reader_config, DecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; - virtual void set_output_image(Image* output_image) = 0; - virtual LoaderModuleStatus load_next() = 0; // Loads the next image data into the Image's buffer set by calling into the set_output_image + virtual void set_output(rocalTensor* output_image) = 0; + virtual LoaderModuleStatus load_next() = 0; // Loads the next image data into the Image's buffer set by calling into the set_output virtual void reset() = 0; // Resets the loader to load from the beginning of the media virtual size_t remaining_count() = 0; // Returns the number of available images to be loaded virtual ~LoaderModule()= default; @@ -60,6 +60,8 @@ class LoaderModule // introduce meta data reader virtual void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) = 0; virtual void shut_down() = 0; + virtual std::vector get_sequence_start_frame_number() { return {}; } + virtual std::vector> get_sequence_frame_timestamps() { return {}; } }; using pLoaderModule = std::shared_ptr; \ No newline at end of file diff --git a/rocAL/rocAL/include/loaders/video/node_video_loader.h b/rocAL/rocAL/include/loaders/video/node_video_loader.h index 41625cb2dc..a0b27da6e1 100644 --- a/rocAL/rocAL/include/loaders/video/node_video_loader.h +++ b/rocAL/rocAL/include/loaders/video/node_video_loader.h @@ -30,7 +30,7 @@ THE SOFTWARE. class VideoLoaderNode : public Node { public: - VideoLoaderNode(Image *output, void * device_resources); + VideoLoaderNode(rocalTensor *output, void * device_resources); ~VideoLoaderNode() override; VideoLoaderNode() = delete; /// @@ -39,9 +39,9 @@ class VideoLoaderNode : public Node /// \param load_batch_count Defines the quantum count of the sequences to be loaded. It's usually equal to the user's batch size. /// The loader will repeat sequences if necessary to be able to have sequences in multiples of the load_batch_count, /// for example if there are 10 sequences in the dataset and load_batch_count is 3, the loader repeats 2 sequences as if there are 12 sequences available. - void init(unsigned internal_shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, + void init(unsigned internal_shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type); - std::shared_ptr get_loader_module(); + std::shared_ptr get_loader_module(); protected: void create_node() override{}; void update_node() override{}; diff --git a/rocAL/rocAL/include/loaders/video/node_video_loader_single_shard.h b/rocAL/rocAL/include/loaders/video/node_video_loader_single_shard.h index a463ed31d2..f9ff87eb1f 100644 --- a/rocAL/rocAL/include/loaders/video/node_video_loader_single_shard.h +++ b/rocAL/rocAL/include/loaders/video/node_video_loader_single_shard.h @@ -30,7 +30,7 @@ THE SOFTWARE. class VideoLoaderSingleShardNode : public Node { public: - VideoLoaderSingleShardNode(Image *output, void *device_resources); + VideoLoaderSingleShardNode(rocalTensor *output, void *device_resources); ~VideoLoaderSingleShardNode() override; /// \param user_shard_count shard count from user @@ -39,10 +39,10 @@ class VideoLoaderSingleShardNode : public Node /// \param load_batch_count Defines the quantum count of the sequences to be loaded. It's usually equal to the user's batch size. /// The loader will repeat sequences if necessary to be able to have sequences in multiples of the load_batch_count, /// for example if there are 10 sequences in the dataset and load_batch_count is 3, the loader repeats 2 sequences as if there are 12 sequences available. - void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, + void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type); - std::shared_ptr get_loader_module(); + std::shared_ptr get_loader_module(); protected: void create_node() override {}; void update_node() override {}; diff --git a/rocAL/rocAL/include/loaders/video/video_loader.h b/rocAL/rocAL/include/loaders/video/video_loader.h index caa1092ee8..701c33938e 100644 --- a/rocAL/rocAL/include/loaders/video/video_loader.h +++ b/rocAL/rocAL/include/loaders/video/video_loader.h @@ -34,23 +34,25 @@ THE SOFTWARE. // // VideoLoader runs an internal thread for loading an decoding of sequences asynchronously // it uses a circular buffer to store decoded sequence of frames for the user -class VideoLoader : public VideoLoaderModule +class VideoLoader : public LoaderModule { public: explicit VideoLoader(void * dev_resources); ~VideoLoader() override; - VideoLoaderModuleStatus load_next() override; - void initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; - void set_output_image(Image *output_image) override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(rocalTensor* output_image) override; size_t remaining_count() override; // returns number of remaining items to be loaded void reset() override; // Resets the loader to load from the beginning Timing timing() override; void start_loading() override; - VideoLoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); - VideoLoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); + LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); + LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); std::vector get_id() override; decoded_image_info get_decode_image_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + crop_image_info get_crop_image_info() override { return _crop_img_info; } + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override {}; std::vector get_sequence_start_frame_number() override; std::vector> get_sequence_frame_timestamps() override; void shut_down() override; @@ -60,14 +62,13 @@ class VideoLoader : public VideoLoaderModule void de_init(); void stop_internal_thread(); std::shared_ptr _video_loader; - VideoLoaderModuleStatus update_output_image(); - VideoLoaderModuleStatus load_routine(); - Image *_output_image; + LoaderModuleStatus update_output_image(); + LoaderModuleStatus load_routine(); + rocalTensor* _output_tensor; std::vector _output_names; //!< frame name/ids that are stored in the _output_image size_t _output_mem_size; bool _internal_thread_running; size_t _batch_size; - size_t _sequence_count; size_t _sequence_length; std::thread _load_thread; RocalMemType _mem_type; @@ -84,5 +85,7 @@ class VideoLoader : public VideoLoaderModule bool _decoder_keep_original = false; std::vector> _sequence_start_framenum_vec; std::vector>> _sequence_frame_timestamps_vec; + crop_image_info _crop_img_info; + size_t _max_decoded_width, _max_decoded_height; }; #endif diff --git a/rocAL/rocAL/include/loaders/video/video_loader_module.h b/rocAL/rocAL/include/loaders/video/video_loader_module.h deleted file mode 100644 index 73e6cdf30b..0000000000 --- a/rocAL/rocAL/include/loaders/video/video_loader_module.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once -#include -#include "video_reader.h" -#include "video_decoder.h" -#include "commons.h" -#include "image.h" -#include "circular_buffer.h" -#include "meta_data_reader.h" -#include "meta_data_graph.h" - -#ifdef ROCAL_VIDEO -enum class VideoLoaderModuleStatus -{ - OK = 0, - DEVICE_BUFFER_SWAP_FAILED, - HOST_BUFFER_SWAP_FAILED, - NO_FILES_TO_READ, - DECODE_FAILED, - NO_MORE_DATA_TO_READ, - NOT_INITIALIZED -}; - -/*! \class VideoLoaderModule The interface defining the API and requirements of loader modules*/ -class VideoLoaderModule -{ -public: - virtual void initialize(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; - virtual void set_output_image(Image *output_image) = 0; - virtual VideoLoaderModuleStatus load_next() = 0; // Loads the next sequence of frames into the buffer set by calling into the set_output_image - virtual void reset() = 0; // Resets the loader to load from the beginning of the video files - virtual size_t remaining_count() = 0; // Returns the number of available frames to be loaded - virtual ~VideoLoaderModule() = default; - virtual Timing timing() = 0; // Returns timing info - virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded - virtual void start_loading() = 0; // starts internal loading thread - virtual decoded_image_info get_decode_image_info() = 0; - virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; - virtual std::vector get_sequence_start_frame_number() = 0; - virtual std::vector> get_sequence_frame_timestamps() = 0; - virtual void shut_down() = 0; -}; - -using pVideoLoaderModule = std::shared_ptr; -#endif diff --git a/rocAL/rocAL/include/loaders/video/video_loader_sharded.h b/rocAL/rocAL/include/loaders/video/video_loader_sharded.h index 44b79ebcb7..c6d0f87910 100644 --- a/rocAL/rocAL/include/loaders/video/video_loader_sharded.h +++ b/rocAL/rocAL/include/loaders/video/video_loader_sharded.h @@ -28,21 +28,23 @@ THE SOFTWARE. // It improves load and decode performance since each loader loads the sequences in parallel using an internal thread // #ifdef ROCAL_VIDEO -class VideoLoaderSharded : public VideoLoaderModule +class VideoLoaderSharded : public LoaderModule { public: explicit VideoLoaderSharded(void *dev_resources); ~VideoLoaderSharded() override; - VideoLoaderModuleStatus load_next() override; - void initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; void shut_down() override; - void set_output_image(Image *output_image) override; + void set_output(rocalTensor* output_image) override; size_t remaining_count() override; void reset() override; void start_loading() override; std::vector get_id() override; decoded_image_info get_decode_image_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + crop_image_info get_crop_image_info() override { return _crop_img_info; } + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override {}; std::vector get_sequence_start_frame_number() override; std::vector> get_sequence_frame_timestamps() override; Timing timing() override; @@ -55,6 +57,7 @@ class VideoLoaderSharded : public VideoLoaderModule size_t _shard_count = 1; void fast_forward_through_empty_loaders(); size_t _prefetch_queue_depth; // Used for circular buffer's internal buffer - Image *_output_image; + rocalTensor* _output_tensor; + crop_image_info _crop_img_info; }; #endif diff --git a/rocAL/rocAL/include/loaders/video/video_read_and_decode.h b/rocAL/rocAL/include/loaders/video/video_read_and_decode.h index 8f43fb7c92..87fa485084 100644 --- a/rocAL/rocAL/include/loaders/video/video_read_and_decode.h +++ b/rocAL/rocAL/include/loaders/video/video_read_and_decode.h @@ -33,8 +33,10 @@ THE SOFTWARE. #include "ffmpeg_video_decoder.h" #include "video_reader_factory.h" #include "timing_debug.h" -#include "video_loader_module.h" +#include "loader_module.h" #include "video_properties.h" +#include "reader.h" +#include "video_reader.h" #ifdef ROCAL_VIDEO extern "C" { @@ -48,7 +50,7 @@ class VideoReadAndDecode ~VideoReadAndDecode(); size_t count(); void reset(); - void create(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, int batch_size); + void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size); void set_video_process_count(size_t video_count) { _video_process_count = (video_count <= _max_video_count) ? video_count : _max_video_count; @@ -66,7 +68,7 @@ class VideoReadAndDecode /// \param sequence_start_framenum_vec is set by the load() function. The starting frame number of the sequences will be updated. /// \param sequence_frame_timestamps_vec is set by the load() function. The timestamps of each of the frames in the sequences will be updated. /// \param output_color_format defines what color format user expects decoder to decode frames into if capable of doing so supported is - VideoLoaderModuleStatus load( + LoaderModuleStatus load( unsigned char *buff, std::vector &names, const size_t max_decoded_width, @@ -102,7 +104,6 @@ class VideoReadAndDecode std::vector _sequence_video_idx; TimingDBG _file_load_time, _decode_time; size_t _batch_size; - size_t _sequence_count; size_t _sequence_length; size_t _stride; size_t _video_count; @@ -111,6 +112,6 @@ class VideoReadAndDecode size_t _max_decoded_height; size_t _max_decoded_stride; AVPixelFormat _out_pix_fmt; - VideoDecoderConfig _video_decoder_config; + DecoderConfig _video_decoder_config; }; #endif diff --git a/rocAL/rocAL/include/pipeline/master_graph.h b/rocAL/rocAL/include/pipeline/master_graph.h index 8b58d0d2ce..e5a03719d5 100644 --- a/rocAL/rocAL/include/pipeline/master_graph.h +++ b/rocAL/rocAL/include/pipeline/master_graph.h @@ -192,7 +192,7 @@ std::shared_ptr MasterGraph::meta_add_node(std::shared_ptr node) /* * Explicit specialization for ImageLoaderNode */ -template<> inline std::shared_ptr MasterGraph::add_node(const std::vector& inputs, const std::vector& outputs) +template<> inline std::shared_ptr MasterGraph::add_node(const std::vector& inputs, const std::vector& outputs) { if(_loader_module) THROW("A loader already exists, cannot have more than one loader") diff --git a/rocAL/rocAL/include/pipeline/tensor.h b/rocAL/rocAL/include/pipeline/tensor.h index 6c63600e6b..e17631c3d1 100644 --- a/rocAL/rocAL/include/pipeline/tensor.h +++ b/rocAL/rocAL/include/pipeline/tensor.h @@ -159,6 +159,10 @@ class rocalTensorInfo { void set_color_format(RocalColorFormat color_format) { _color_format = color_format; } + // Introduce for SequenceReader, as batch size is different in case of sequence reader + void set_sequence_batch_size(unsigned sequence_length) { + _batch_size *= sequence_length; + } size_t get_channels() const { return _channels; } unsigned num_of_dims() const { return _num_of_dims; } unsigned batch_size() const { return _batch_size; } diff --git a/rocAL/rocAL/include/readers/image/reader.h b/rocAL/rocAL/include/readers/image/reader.h index be6eeec6ab..a799a12add 100644 --- a/rocAL/rocAL/include/readers/image/reader.h +++ b/rocAL/rocAL/include/readers/image/reader.h @@ -26,6 +26,7 @@ THE SOFTWARE. #include #include #include "meta_data_reader.h" +#include "video_properties.h" #define CHECK_LMDB_RETURN_STATUS(status) \ do { \ @@ -44,6 +45,7 @@ enum class StorageType COCO_FILE_SYSTEM = 5, SEQUENCE_FILE_SYSTEM = 6, MXNET_RECORDIO = 7, + VIDEO_FILE_SYSTEM = 8, }; struct ReaderConfig @@ -66,15 +68,19 @@ struct ReaderConfig void set_loop(bool loop) { _loop = loop; } void set_meta_data_reader(std::shared_ptr meta_data_reader) { _meta_data_reader = meta_data_reader; } void set_sequence_length(unsigned sequence_length) { _sequence_length = sequence_length; } - void set_frame_step(unsigned step) { _step = step; } - void set_frame_stride(unsigned stride) { _stride = stride; } + void set_frame_step(unsigned step) { _sequence_frame_step = step; } + void set_frame_stride(unsigned stride) { _sequence_frame_stride = stride; } size_t get_shard_count() { return _shard_count; } size_t get_shard_id() { return _shard_id; } size_t get_batch_size() { return _batch_count; } size_t get_sequence_length() { return _sequence_length; } - size_t get_frame_step() { return _step; } - size_t get_frame_stride() { return _stride; } + size_t get_frame_step() { return _sequence_frame_step; } + size_t get_frame_stride() { return _sequence_frame_stride; } std::string path() { return _path; } +#ifdef ROCAL_VIDEO + void set_video_properties(VideoProperties video_prop) { _video_prop = video_prop;} + VideoProperties get_video_properties() { return _video_prop; } +#endif std::string json_path() { return _json_path; } std::map feature_key_map() { return _feature_key_map; } void set_file_prefix(const std::string &prefix) { _file_prefix = prefix; } @@ -89,12 +95,15 @@ struct ReaderConfig size_t _shard_id = 0; size_t _batch_count = 1; //!< The reader will repeat images if necessary to be able to have images in multiples of the _batch_count. size_t _sequence_length = 1; // Video reader module sequence length - size_t _step; - size_t _stride = 1; + size_t _sequence_frame_step; + size_t _sequence_frame_stride = 1; bool _shuffle = false; bool _loop = false; std::string _file_prefix = ""; //!< to read only files with prefix. supported only for cifar10_data_reader and tf_record_reader std::shared_ptr _meta_data_reader = nullptr; +#ifdef ROCAL_VIDEO + VideoProperties _video_prop; +#endif }; // MXNet image recordio struct - used to read the contents from the MXNet recordIO files. @@ -107,8 +116,7 @@ struct ImageRecordIOHeader { * image_id[0] is used to store image id */ }; -class Reader -{ +class Reader { public: enum class Status { diff --git a/rocAL/rocAL/include/readers/video/video_file_source_reader.h b/rocAL/rocAL/include/readers/video/video_file_source_reader.h index 562ebc530d..71a122aba9 100644 --- a/rocAL/rocAL/include/readers/video/video_file_source_reader.h +++ b/rocAL/rocAL/include/readers/video/video_file_source_reader.h @@ -37,7 +37,7 @@ class VideoFileSourceReader : public VideoReader /*! \param desc User provided descriptor containing the files' path. */ - VideoReader::Status initialize(VideoReaderConfig desc) override; + VideoReader::Status initialize(ReaderConfig desc) override; //! Reads the next resource item SequenceInfo get_sequence_info() override; diff --git a/rocAL/rocAL/include/readers/video/video_reader.h b/rocAL/rocAL/include/readers/video/video_reader.h index dcea9e486e..bd870d7ccc 100644 --- a/rocAL/rocAL/include/readers/video/video_reader.h +++ b/rocAL/rocAL/include/readers/video/video_reader.h @@ -26,64 +26,12 @@ THE SOFTWARE. #include #include #include "commons.h" +#include "reader.h" #include "meta_data_reader.h" #include "video_properties.h" #ifdef ROCAL_VIDEO -enum class VideoStorageType -{ - VIDEO_FILE_SYSTEM = 0 -}; - -struct VideoReaderConfig -{ - explicit VideoReaderConfig(VideoStorageType type, std::string path = "", bool shuffle = false, bool loop = false) : - _type(type), _path(path), _shuffle(shuffle), _loop(loop) {} - virtual VideoStorageType type() { return _type; }; - void set_path(const std::string &path) { _path = path; } - void set_shard_id(size_t shard_id) { _shard_id = shard_id; } - void set_shard_count(size_t shard_count) { _shard_count = shard_count; } - /// \param read_batch_count Tells the reader it needs to read the video sequences of load_batch_count. If available video sequences not divisible to load_batch_count, - /// the reader will repeat video sequences to make available sequences an even multiple of this load_batch_count - void set_batch_count(size_t read_batch_count) { _batch_count = read_batch_count; } - /// \param loop if True the reader's available video sequences still the same no matter how many sequences have been read - bool shuffle() { return _shuffle; } - bool loop() { return _loop; } - void set_shuffle(bool shuffle) { _shuffle = shuffle; } - void set_loop(bool loop) { _loop = loop; } - void set_meta_data_reader(std::shared_ptr meta_data_reader) { _meta_data_reader = meta_data_reader; } - void set_sequence_length(unsigned sequence_length) { _sequence_length = sequence_length; } - void set_frame_step(unsigned step) { _video_frame_step = step; } - void set_frame_stride(unsigned stride) { _video_frame_stride = stride; } - void set_total_frames_count(size_t total) { _total_frames_count = total; } - void set_video_properties(VideoProperties video_prop) { _video_prop = video_prop;} - size_t get_shard_count() { return _shard_count; } - size_t get_shard_id() { return _shard_id; } - size_t get_batch_size() { return _batch_count; } - size_t get_sequence_length() { return _sequence_length; } - size_t get_frame_step() { return _video_frame_step; } - size_t get_frame_stride() { return _video_frame_stride; } - size_t get_total_frames_count() { return _total_frames_count; } - VideoProperties get_video_properties() { return _video_prop; } - std::string path() { return _path; } - std::shared_ptr meta_data_reader() { return _meta_data_reader; } -private: - VideoStorageType _type = VideoStorageType::VIDEO_FILE_SYSTEM; - std::string _path = ""; - size_t _shard_count = 1; - size_t _shard_id = 0; - size_t _batch_count = 1; //!< The reader will repeat images if necessary to be able to have images in multiples of the _batch_count. - size_t _sequence_length = 1; // Video reader module sequence length - size_t _video_frame_step; - size_t _video_frame_stride = 1; - VideoProperties _video_prop; - size_t _total_frames_count; - bool _shuffle = false; - bool _loop = false; - std::shared_ptr _meta_data_reader = nullptr; -}; -struct SequenceInfo -{ +struct SequenceInfo { size_t start_frame_number; std::string video_file_name; }; @@ -99,7 +47,7 @@ class VideoReader /*! \param desc User provided descriptor containing the files' path. */ - virtual Status initialize(VideoReaderConfig desc) = 0; + virtual Status initialize(ReaderConfig desc) = 0; //! Reads the next resource item virtual SequenceInfo get_sequence_info() = 0; diff --git a/rocAL/rocAL/include/readers/video/video_reader_factory.h b/rocAL/rocAL/include/readers/video/video_reader_factory.h index d8e6ffd0d5..272763502f 100644 --- a/rocAL/rocAL/include/readers/video/video_reader_factory.h +++ b/rocAL/rocAL/include/readers/video/video_reader_factory.h @@ -26,5 +26,5 @@ THE SOFTWARE. #include "video_reader.h" #ifdef ROCAL_VIDEO -std::shared_ptr create_video_reader(VideoReaderConfig config); +std::shared_ptr create_video_reader(ReaderConfig config); #endif \ No newline at end of file diff --git a/rocAL/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/rocAL/source/api/rocal_api_augmentation.cpp index 970b62163c..1e58112cd4 100644 --- a/rocAL/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/rocAL/source/api/rocal_api_augmentation.cpp @@ -32,37 +32,32 @@ THE SOFTWARE. // Calculated from the largest resize shorter dimension in imagenet validation dataset #define MAX_ASPECT_RATIO 6.0f -RocalImage ROCAL_API_CALL -rocalSequenceRearrange( - RocalContext p_context, - RocalImage p_input, - unsigned int* new_order, - unsigned int new_sequence_length, - unsigned int sequence_length, - bool is_output ) -{ - Image* output = nullptr; - if ((p_context == nullptr) || (p_input == nullptr)) { +RocalTensor ROCAL_API_CALL +rocalSequenceRearrange(RocalContext p_context, + RocalTensor input, + std::vector& new_order, + bool is_output) { + rocalTensor* output = nullptr; + if ((p_context == nullptr) || (input == nullptr)) { ERR("Invalid ROCAL context or invalid input image") return output; } auto context = static_cast(p_context); - try - { - if(sequence_length == 0) - THROW("sequence_length passed should be bigger than 0") - auto input = static_cast(p_input); - auto info = ImageInfo(input->info().width(), input->info().height_single(), - context->master_graph->internal_batch_size() * new_sequence_length, - input->info().color_plane_count(), - context->master_graph->mem_type(), - input->info().color_format() ); - output = context->master_graph->create_image(info, is_output); + try { + + if(new_order.size() == 0) + THROW("The new order for the sequence passed should be greater than 0") + rocalTensorInfo output_info = input->info(); + std::vector new_dims; + new_dims = output_info.dims(); + new_dims[1] = new_order.size(); + output_info.set_dims(new_dims); + + output = context->master_graph->create_tensor(output_info, is_output); std::shared_ptr sequence_rearrange_node = context->master_graph->add_node({input}, {output}); - sequence_rearrange_node->init(new_order, new_sequence_length, sequence_length, context->master_graph->internal_batch_size()); + sequence_rearrange_node->init(new_order); } - catch(const std::exception& e) - { + catch(const std::exception& e) { context->capture_error(e.what()); ERR(e.what()) } diff --git a/rocAL/rocAL/source/api/rocal_api_data_loaders.cpp b/rocAL/rocAL/source/api/rocal_api_data_loaders.cpp index cbc6ca9299..aced4f26a7 100644 --- a/rocAL/rocAL/source/api/rocal_api_data_loaders.cpp +++ b/rocAL/rocAL/source/api/rocal_api_data_loaders.cpp @@ -276,7 +276,7 @@ rocalJpegFileSource( return output; } -RocalImage ROCAL_API_CALL +RocalTensor ROCAL_API_CALL rocalSequenceReader( RocalContext p_context, const char* source_path, @@ -287,16 +287,14 @@ rocalSequenceReader( bool shuffle, bool loop, unsigned step, - unsigned stride) -{ - Image* output = nullptr; + unsigned stride) { + rocalTensor* output = nullptr; if (p_context == nullptr) { ERR("Invalid ROCAL context or invalid input image") return output; } auto context = static_cast(p_context); - try - { + try { if(sequence_length == 0) THROW("Sequence length passed should be bigger than 0") // Set sequence batch size and batch ratio in master graph as it varies according to sequence length @@ -320,12 +318,17 @@ rocalSequenceReader( INFO("Internal buffer size width = "+ TOSTR(width)+ " height = "+ TOSTR(height) + " depth = "+ TOSTR(num_of_planes)) - auto info = ImageInfo(width, height, - context->internal_batch_size(), - num_of_planes, - context->master_graph->mem_type(), - color_format ); - output = context->master_graph->create_loader_output_image(info); + std::vector dims = {context->user_batch_size(), sequence_length, height, + width, static_cast(num_of_planes)}; + auto info = rocalTensorInfo(std::move(dims), + context->master_graph->mem_type(), + RocalTensorDataType::UINT8); + info.set_color_format(color_format); + info.set_tensor_layout(RocalTensorlayout::NFHWC); + info.set_sequence_batch_size(sequence_length); + info.set_max_shape(); + + output = context->master_graph->create_loader_output_tensor(info); context->master_graph->add_node({}, {output})->init(internal_shard_count, source_path, "", @@ -342,22 +345,19 @@ rocalSequenceReader( step, stride); context->master_graph->set_loop(loop); - if(is_output) - { - auto actual_output = context->master_graph->create_image(info, is_output); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); context->master_graph->add_node({output}, {actual_output}); } - } - catch(const std::exception& e) - { + } catch(const std::exception& e) { context->capture_error(e.what()); std::cerr << e.what() << '\n'; } return output; } -RocalImage ROCAL_API_CALL +RocalTensor ROCAL_API_CALL rocalSequenceReaderSingleShard( RocalContext p_context, const char* source_path, @@ -369,16 +369,14 @@ rocalSequenceReaderSingleShard( bool shuffle, bool loop, unsigned step, - unsigned stride) -{ - Image* output = nullptr; + unsigned stride) { + rocALTensor* output = nullptr; if (p_context == nullptr) { ERR("Invalid ROCAL context or invalid input image") return output; } auto context = static_cast(p_context); - try - { + try { if(sequence_length == 0) THROW("Sequence length passed should be bigger than 0") // Set sequence batch size and batch ratio in master graph as it varies according to sequence length @@ -405,12 +403,16 @@ rocalSequenceReaderSingleShard( INFO("Internal buffer size width = "+ TOSTR(width)+ " height = "+ TOSTR(height) + " depth = "+ TOSTR(num_of_planes)) - auto info = ImageInfo(width, height, - context->internal_batch_size(), - num_of_planes, - context->master_graph->mem_type(), - color_format ); - output = context->master_graph->create_loader_output_image(info); + std::vector dims = {context->user_batch_size(), sequence_length, height, + width, static_cast(num_of_planes)}; + auto info = rocalTensorInfo(std::move(dims), + context->master_graph->mem_type(), + RocalTensorDataType::UINT8); + info.set_color_format(color_format); + info.set_tensor_layout(RocalTensorlayout::NFHWC); + info.set_sequence_batch_size(sequence_length); + info.set_max_shape(); + output = context->master_graph->create_loader_output_tensor(info); context->master_graph->add_node({}, {output})->init(shard_id, shard_count, source_path, "", @@ -427,15 +429,12 @@ rocalSequenceReaderSingleShard( step, stride); context->master_graph->set_loop(loop); - if(is_output) - { - auto actual_output = context->master_graph->create_image(info, is_output); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); context->master_graph->add_node({output}, {actual_output}); } - } - catch(const std::exception& e) - { + } catch(const std::exception& e) { context->capture_error(e.what()); std::cerr << e.what() << '\n'; } @@ -1954,7 +1953,7 @@ rocalFusedJpegCropSingleShard( return output; } -RocalImage ROCAL_API_CALL +RocalTensor ROCAL_API_CALL rocalVideoFileSource( RocalContext p_context, const char* source_path, @@ -1967,46 +1966,46 @@ rocalVideoFileSource( bool loop, unsigned step, unsigned stride, - bool file_list_frame_num) -{ - Image* output = nullptr; + bool file_list_frame_num) { + rocalTensor* output = nullptr; if (p_context == nullptr) { ERR("Invalid ROCAL context or invalid input image") return output; } auto context = static_cast(p_context); - try - { + try { #ifdef ROCAL_VIDEO if(sequence_length == 0) THROW("Sequence length passed should be bigger than 0") - // Set video loader flag in master_graph - context->master_graph->set_video_loader_flag(); // Set default step and stride values if 0 is passed step = (step == 0)? sequence_length : step; stride = (stride == 0)? 1 : stride; VideoProperties video_prop; - VideoDecoderType decoder_type; + DecoderType decoder_type; find_video_properties(video_prop, source_path, file_list_frame_num); if(rocal_decode_device == RocalDecodeDevice::ROCAL_HW_DECODE) - decoder_type = VideoDecoderType::FFMPEG_HARDWARE_DECODE; + decoder_type = DecoderType::FFMPEG_HARDWARE_DECODE; else - decoder_type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; + decoder_type = DecoderType::FFMPEG_SOFTWARE_DECODE; auto [color_format, num_of_planes] = convert_color_format(rocal_color_format); auto decoder_mode = convert_decoder_mode(rocal_decode_device); - auto info = ImageInfo(video_prop.width, video_prop.height, - context->internal_batch_size() * sequence_length, - num_of_planes, - context->master_graph->mem_type(), - color_format ); + + std::vector dims = {context->user_batch_size(), sequence_length, video_prop.height, + video_prop.width, static_cast(num_of_planes)}; + auto info = rocalTensorInfo(std::move(dims), + context->master_graph->mem_type(), + RocalTensorDataType::UINT8); + info.set_color_format(color_format); + info.set_tensor_layout(RocalTensorlayout::NFHWC); + info.set_max_shape(); - output = context->master_graph->create_loader_output_image(info); + output = context->master_graph->create_loader_output_tensor(info); context->master_graph->add_node({}, {output})->init(internal_shard_count, source_path, - VideoStorageType::VIDEO_FILE_SYSTEM, + StorageType::VIDEO_FILE_SYSTEM, decoder_type, decoder_mode, sequence_length, @@ -2019,25 +2018,21 @@ rocalVideoFileSource( context->master_graph->mem_type()); context->master_graph->set_loop(loop); - if(is_output) - { - auto actual_output = context->master_graph->create_image(info, is_output); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); context->master_graph->add_node({output}, {actual_output}); } #else THROW("Video decoder is not enabled since ffmpeg is not present") #endif - } - catch(const std::exception& e) - { + } catch(const std::exception& e) { context->capture_error(e.what()); std::cerr << e.what() << '\n'; } return output; - } -RocalImage ROCAL_API_CALL +RocalTensor ROCAL_API_CALL rocalVideoFileSourceSingleShard( RocalContext p_context, const char* source_path, @@ -2051,21 +2046,17 @@ rocalVideoFileSourceSingleShard( bool loop, unsigned step, unsigned stride, - bool file_list_frame_num) -{ - Image* output = nullptr; + bool file_list_frame_num) { + rocalTensor* output = nullptr; if (p_context == nullptr) { - ERR("Invalid ROCAL context or invalid input image") + ERR("Invalid ROCAL context") return output; } auto context = static_cast(p_context); - try - { + try { #ifdef ROCAL_VIDEO if(sequence_length == 0) THROW("Sequence length passed should be bigger than 0") - // Set video loader flag in master_graph - context->master_graph->set_video_loader_flag(); if(shard_count < 1 ) THROW("Shard count should be bigger than 0") @@ -2078,25 +2069,28 @@ rocalVideoFileSourceSingleShard( stride = (stride == 0)? 1 : stride; VideoProperties video_prop; - VideoDecoderType decoder_type; + DecoderType decoder_type; find_video_properties(video_prop, source_path, file_list_frame_num); if(rocal_decode_device == RocalDecodeDevice::ROCAL_HW_DECODE) - decoder_type = VideoDecoderType::FFMPEG_HARDWARE_DECODE; + decoder_type = DecoderType::FFMPEG_HARDWARE_DECODE; else - decoder_type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; + decoder_type = DecoderType::FFMPEG_SOFTWARE_DECODE; auto [color_format, num_of_planes] = convert_color_format(rocal_color_format); auto decoder_mode = convert_decoder_mode(rocal_decode_device); - auto info = ImageInfo(video_prop.width, video_prop.height, - context->internal_batch_size() * sequence_length, - num_of_planes, - context->master_graph->mem_type(), - color_format ); + std::vector dims = {context->user_batch_size(), sequence_length, video_prop.height, + video_prop.width, static_cast(num_of_planes)}; + auto info = rocalTensorInfo(std::move(dims), + context->master_graph->mem_type(), + RocalTensorDataType::UINT8); + info.set_color_format(color_format); + info.set_tensor_layout(RocalTensorlayout::NFHWC); + info.set_max_shape(); - output = context->master_graph->create_loader_output_image(info); + output = context->master_graph->create_loader_output_tensor(info); context->master_graph->add_node({}, {output})->init(shard_id, shard_count, source_path, - VideoStorageType::VIDEO_FILE_SYSTEM, + StorageType::VIDEO_FILE_SYSTEM, decoder_type, decoder_mode, sequence_length, @@ -2109,17 +2103,14 @@ rocalVideoFileSourceSingleShard( context->master_graph->mem_type()); context->master_graph->set_loop(loop); - if(is_output) - { - auto actual_output = context->master_graph->create_image(info, is_output); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); context->master_graph->add_node({output}, {actual_output}); } #else THROW("Video decoder is not enabled since ffmpeg is not present") #endif - } - catch(const std::exception& e) - { + } catch(const std::exception& e) { context->capture_error(e.what()); std::cerr << e.what() << '\n'; } @@ -2127,7 +2118,7 @@ rocalVideoFileSourceSingleShard( } -RocalImage ROCAL_API_CALL +RocalTensor ROCAL_API_CALL rocalVideoFileResize( RocalContext p_context, const char* source_path, @@ -2149,44 +2140,44 @@ rocalVideoFileResize( unsigned resize_longer, RocalResizeInterpolationType interpolation_type) { - Image* resize_output = nullptr; + rocalTensor* resize_output = nullptr; if (p_context == nullptr) { ERR("Invalid ROCAL context or invalid input image") return resize_output; } auto context = static_cast(p_context); - try - { + try { #ifdef ROCAL_VIDEO if(sequence_length == 0) THROW("Sequence length passed should be bigger than 0") - // Set video loader flag in master_graph - context->master_graph->set_video_loader_flag(); // Set default step and stride values if 0 is passed step = (step == 0)? sequence_length : step; stride = (stride == 0)? 1 : stride; VideoProperties video_prop; - VideoDecoderType decoder_type; + DecoderType decoder_type; find_video_properties(video_prop, source_path, file_list_frame_num); if(rocal_decode_device == RocalDecodeDevice::ROCAL_HW_DECODE) - decoder_type = VideoDecoderType::FFMPEG_HARDWARE_DECODE; + decoder_type = DecoderType::FFMPEG_HARDWARE_DECODE; else - decoder_type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; + decoder_type = DecoderType::FFMPEG_SOFTWARE_DECODE; auto [color_format, num_of_planes] = convert_color_format(rocal_color_format); auto decoder_mode = convert_decoder_mode(rocal_decode_device); - auto info = ImageInfo(video_prop.width, video_prop.height, - context->internal_batch_size() * sequence_length, - num_of_planes, - context->master_graph->mem_type(), - color_format ); + std::vector dims = {context->user_batch_size(), sequence_length, video_prop.height, + video_prop.width, static_cast(num_of_planes)}; + auto info = rocalTensorInfo(std::move(dims), + context->master_graph->mem_type(), + RocalTensorDataType::UINT8); + info.set_color_format(color_format); + info.set_tensor_layout(RocalTensorlayout::NFHWC); + info.set_max_shape(); - Image* output = context->master_graph->create_loader_output_image(info); + rocalTensor* output = context->master_graph->create_loader_output_tensor(info); context->master_graph->add_node({}, {output})->init(internal_shard_count, source_path, - VideoStorageType::VIDEO_FILE_SYSTEM, + StorageType::VIDEO_FILE_SYSTEM, decoder_type, decoder_mode, sequence_length, @@ -2199,8 +2190,7 @@ rocalVideoFileResize( context->master_graph->mem_type()); context->master_graph->set_loop(loop); - if(dest_width != video_prop.width && dest_height != video_prop.height) - { + if(dest_width != video_prop.width && dest_height != video_prop.height) { if((dest_width | dest_height | resize_longer | resize_shorter) == 0) THROW("Atleast one size 'dest_width' or 'dest_height' or 'resize_shorter' or 'resize_longer' must be specified") if((dest_width | dest_height) && (resize_longer | resize_shorter)) @@ -2244,8 +2234,8 @@ rocalVideoFileResize( } else { // compute the output info width and height wrt the scaling modes and roi passed if(resize_scaling_mode == ROCAL_SCALING_MODE_STRETCH) { - max_out_width = out_width ? out_width : info.width(); - max_out_height = out_height ? out_height : info.height_single(); + max_out_width = out_width ? out_width : info.max_shape()[0]; + max_out_height = out_height ? out_height : info.max_shape()[1]; } else if(resize_scaling_mode == ROCAL_SCALING_MODE_NOT_SMALLER) { max_out_width = (out_width ? out_width : out_height) * MAX_ASPECT_RATIO; max_out_height = (out_height ? out_height : out_width) * MAX_ASPECT_RATIO; @@ -2261,46 +2251,42 @@ rocalVideoFileResize( // set the width and height in the output info // For the resize node, user can create an image with a different width and height - ImageInfo output_info = info; - output_info.width(max_out_width); - output_info.height(max_out_height); + rocalTensorInfo output_info = info; + std::vector out_dims = {context->user_batch_size(), sequence_length, max_out_height, + max_out_width, static_cast(num_of_planes)}; + output_info.set_dims(out_dims); - resize_output = context->master_graph->create_image(output_info, false); + resize_output = context->master_graph->create_tensor(output_info, false); // For the nodes that user provides the output size the dimension of all the images after this node will be fixed and equal to that size - resize_output->reset_image_roi(); + resize_output->reset_tensor_roi(); std::shared_ptr resize_node = context->master_graph->add_node({output}, {resize_output}); resize_node->init(out_width, out_height, resize_scaling_mode, maximum_size, interpolation_type); if (context->master_graph->meta_data_graph()) context->master_graph->meta_add_node(resize_node); - if(is_output) - { - auto actual_output = context->master_graph->create_image(output_info, is_output); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(output_info, is_output); context->master_graph->add_node({resize_output}, {actual_output}); } - } - else{ - if(is_output) - { - auto actual_output = context->master_graph->create_image(info, is_output); + } else { + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); context->master_graph->add_node({output}, {actual_output}); } } #else THROW("Video decoder is not enabled since ffmpeg is not present") #endif - } - catch(const std::exception& e) - { + } catch(const std::exception& e) { context->capture_error(e.what()); std::cerr << e.what() << '\n'; } return resize_output; } -RocalImage ROCAL_API_CALL +RocalTensor ROCAL_API_CALL rocalVideoFileResizeSingleShard( RocalContext p_context, const char* source_path, @@ -2323,20 +2309,17 @@ rocalVideoFileResizeSingleShard( unsigned resize_longer, RocalResizeInterpolationType interpolation_type) { - Image* resize_output = nullptr; + rocalTensor* resize_output = nullptr; if (p_context == nullptr) { ERR("Invalid ROCAL context or invalid input image") return resize_output; } auto context = static_cast(p_context); - try - { + try { #ifdef ROCAL_VIDEO if(sequence_length == 0) THROW("Sequence length passed should be bigger than 0") - // Set video loader flag in master_graph - context->master_graph->set_video_loader_flag(); if(shard_count < 1 ) THROW("Shard count should be bigger than 0") @@ -2349,24 +2332,27 @@ rocalVideoFileResizeSingleShard( stride = (stride == 0)? 1 : stride; VideoProperties video_prop; - VideoDecoderType decoder_type; + DecoderType decoder_type; find_video_properties(video_prop, source_path, file_list_frame_num); if(rocal_decode_device == RocalDecodeDevice::ROCAL_HW_DECODE) - decoder_type = VideoDecoderType::FFMPEG_HARDWARE_DECODE; + decoder_type = DecoderType::FFMPEG_HARDWARE_DECODE; else - decoder_type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; + decoder_type = DecoderType::FFMPEG_SOFTWARE_DECODE; auto [color_format, num_of_planes] = convert_color_format(rocal_color_format); auto decoder_mode = convert_decoder_mode(rocal_decode_device); - auto info = ImageInfo(video_prop.width, video_prop.height, - context->internal_batch_size() * sequence_length, - num_of_planes, - context->master_graph->mem_type(), - color_format ); - - Image* output = context->master_graph->create_loader_output_image(info); + + std::vector dims = {context->user_batch_size(), sequence_length, video_prop.height, + video_prop.width, static_cast(num_of_planes)}; + auto info = rocalTensorInfo(std::move(dims), + context->master_graph->mem_type(), + RocalTensorDataType::UINT8); + info.set_color_format(color_format); + info.set_tensor_layout(RocalTensorlayout::NFHWC); + info.set_max_shape(); + rocalTensor* output = context->master_graph->create_loader_output_tensor(info); context->master_graph->add_node({}, {output})->init(shard_id, shard_count, source_path, - VideoStorageType::VIDEO_FILE_SYSTEM, + StorageType::VIDEO_FILE_SYSTEM, decoder_type, decoder_mode, sequence_length, @@ -2379,8 +2365,7 @@ rocalVideoFileResizeSingleShard( context->master_graph->mem_type()); context->master_graph->set_loop(loop); - if(dest_width != video_prop.width && dest_height != video_prop.height) - { + if(dest_width != video_prop.width && dest_height != video_prop.height) { if((dest_width | dest_height | resize_longer | resize_shorter) == 0) THROW("Atleast one size 'dest_width' or 'dest_height' or 'resize_shorter' or 'resize_longer' must be specified") if((dest_width | dest_height) && (resize_longer | resize_shorter)) @@ -2424,8 +2409,8 @@ rocalVideoFileResizeSingleShard( } else { // compute the output info width and height wrt the scaling modes and roi passed if(resize_scaling_mode == ROCAL_SCALING_MODE_STRETCH) { - max_out_width = out_width ? out_width : info.width(); - max_out_height = out_height ? out_height : info.height_single(); + max_out_width = out_width ? out_width : info.max_shape()[0]; + max_out_height = out_height ? out_height : info.max_shape()[1]; } else if(resize_scaling_mode == ROCAL_SCALING_MODE_NOT_SMALLER) { max_out_width = (out_width ? out_width : out_height) * MAX_ASPECT_RATIO; max_out_height = (out_height ? out_height : out_width) * MAX_ASPECT_RATIO; @@ -2441,38 +2426,34 @@ rocalVideoFileResizeSingleShard( // set the width and height in the output info // For the resize node, user can create an image with a different width and height - ImageInfo output_info = info; - output_info.width(max_out_width); - output_info.height(max_out_height); + rocalTensorInfo output_info = info; + std::vector out_dims = {context->user_batch_size(), sequence_length, max_out_height, + max_out_width, static_cast(num_of_planes)}; + output_info.set_dims(out_dims); - resize_output = context->master_graph->create_image(output_info, false); + resize_output = context->master_graph->create_tensor(output_info, false); // For the nodes that user provides the output size the dimension of all the images after this node will be fixed and equal to that size - resize_output->reset_image_roi(); + resize_output->reset_tensor_roi(); std::shared_ptr resize_node = context->master_graph->add_node({output}, {resize_output}); resize_node->init(out_width, out_height, resize_scaling_mode, maximum_size, interpolation_type); if (context->master_graph->meta_data_graph()) context->master_graph->meta_add_node(resize_node); - if(is_output) - { - auto actual_output = context->master_graph->create_image(output_info, is_output); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(output_info, is_output); context->master_graph->add_node({resize_output}, {actual_output}); } - } - else{ - if(is_output) - { - auto actual_output = context->master_graph->create_image(info, is_output); + } else { + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); context->master_graph->add_node({output}, {actual_output}); } } #else THROW("Video decoder is not enabled since ffmpeg is not present") #endif - } - catch(const std::exception& e) - { + } catch(const std::exception& e) { context->capture_error(e.what()); std::cerr << e.what() << '\n'; } diff --git a/rocAL/rocAL/source/augmentations/geometry_augmentations/node_crop_mirror_normalize.cpp b/rocAL/rocAL/source/augmentations/geometry_augmentations/node_crop_mirror_normalize.cpp index 3f61203f49..78545d52b0 100644 --- a/rocAL/rocAL/source/augmentations/geometry_augmentations/node_crop_mirror_normalize.cpp +++ b/rocAL/rocAL/source/augmentations/geometry_augmentations/node_crop_mirror_normalize.cpp @@ -87,6 +87,8 @@ void CropMirrorNormalizeNode::create_node() { vx_size num_of_dims = 2; vx_size stride[num_of_dims]; std::vector crop_tensor_dims = {_batch_size, 4}; + if(_inputs[0]->info().layout() == RocalTensorlayout::NFCHW || _inputs[0]->info().layout() == RocalTensorlayout::NFHWC) + crop_tensor_dims = {_inputs[0]->info().dims()[0] * _inputs[0]->info().dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions stride[0] = sizeof(vx_uint32); stride[1] = stride[0] * crop_tensor_dims[0]; vx_enum mem_type = VX_MEMORY_TYPE_HOST; diff --git a/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp b/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp index 30ec3b3779..86aa94f699 100644 --- a/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp +++ b/rocAL/rocAL/source/augmentations/node_sequence_rearrange.cpp @@ -26,35 +26,29 @@ THE SOFTWARE. #include "exception.h" -SequenceRearrangeNode::SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs) : - Node(inputs, outputs) -{ -} +SequenceRearrangeNode::SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs) : + Node(inputs, outputs) { } -void SequenceRearrangeNode::create_node() -{ +void SequenceRearrangeNode::create_node() { if(_node) return; vx_status status; - _sequence_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, _new_sequence_length); - status = vxAddArrayItems(_sequence_array, _new_sequence_length, _new_order.data(), sizeof(vx_uint32)); + vx_array sequence_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, _new_order.size()); + status = vxAddArrayItems(sequence_array, _new_order.size(), _new_order.data(), sizeof(vx_uint32)); if(status != VX_SUCCESS) - THROW("Adding array items failed: "+ TOSTR(status)) - _node = vxExtrppNode_SequenceRearrange(_graph->get(), _inputs[0]->handle(), _outputs[0]->handle(), _sequence_array, _new_sequence_length, _sequence_length, _sequence_count); + THROW("Adding array items failed: "+ TOSTR(status)); + + int input_layout = (int)_inputs[0]->info().layout(); + vx_scalar in_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + _node = vxExtrppNode_SequenceRearrange(_graph->get(), _inputs[0]->handle(), _outputs[0]->handle(), sequence_array, in_layout_vx); + if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) THROW("Adding the sequence rearrange (vxExtrppNode_SequenceRearrange) node failed: "+ TOSTR(status)) } -void SequenceRearrangeNode::init(unsigned int* new_order, unsigned int new_sequence_length, unsigned int sequence_length, unsigned int sequence_count) -{ - _new_sequence_length = new_sequence_length; - _sequence_length = sequence_length; - _sequence_count = sequence_count; - _new_order.resize(_new_sequence_length); - std::copy(new_order, new_order + _new_sequence_length, _new_order.begin()); +void SequenceRearrangeNode::init(std::vector& new_order) { + _new_order = new_order; } -void SequenceRearrangeNode::update_node() -{ -} +void SequenceRearrangeNode::update_node() { } diff --git a/rocAL/rocAL/source/decoders/video/video_decoder_factory.cpp b/rocAL/rocAL/source/decoders/video/video_decoder_factory.cpp index ca0856973d..4685172163 100644 --- a/rocAL/rocAL/source/decoders/video/video_decoder_factory.cpp +++ b/rocAL/rocAL/source/decoders/video/video_decoder_factory.cpp @@ -26,13 +26,13 @@ THE SOFTWARE. #include "commons.h" #ifdef ROCAL_VIDEO -std::shared_ptr create_video_decoder(VideoDecoderConfig config) +std::shared_ptr create_video_decoder(DecoderConfig config) { switch (config.type()) { - case VideoDecoderType::FFMPEG_SOFTWARE_DECODE: + case DecoderType::FFMPEG_SOFTWARE_DECODE: return std::make_shared(); - case VideoDecoderType::FFMPEG_HARDWARE_DECODE: + case DecoderType::FFMPEG_HARDWARE_DECODE: return std::make_shared(); default: THROW("Unsupported decoder type " + TOSTR(config.type())); diff --git a/rocAL/rocAL/source/loaders/video/node_video_loader.cpp b/rocAL/rocAL/source/loaders/video/node_video_loader.cpp index 54f5822ac3..57661d0b07 100644 --- a/rocAL/rocAL/source/loaders/video/node_video_loader.cpp +++ b/rocAL/rocAL/source/loaders/video/node_video_loader.cpp @@ -32,7 +32,7 @@ VideoLoaderNode::VideoLoaderNode(Image *output, void *device_resources): _loader_module = std::make_shared(device_resources); } -void VideoLoaderNode::init(unsigned internal_shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, +void VideoLoaderNode::init(unsigned internal_shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type) { _decode_mode = decoder_mode; @@ -42,18 +42,18 @@ void VideoLoaderNode::init(unsigned internal_shard_count, const std::string &sou THROW("Shard count should be greater than or equal to one") _loader_module->set_output_image(_outputs[0]); // Set reader and decoder config accordingly for the VideoLoaderNode - auto reader_cfg = VideoReaderConfig(storage_type, source_path, shuffle, loop); + auto reader_cfg = ReaderConfig(storage_type, source_path, shuffle, loop); reader_cfg.set_shard_count(internal_shard_count); reader_cfg.set_batch_count(load_batch_count); reader_cfg.set_sequence_length(sequence_length); reader_cfg.set_frame_step(step); reader_cfg.set_frame_stride(stride); reader_cfg.set_video_properties(video_prop); - _loader_module->initialize(reader_cfg, VideoDecoderConfig(decoder_type), mem_type, _batch_size); + _loader_module->initialize(reader_cfg, DecoderConfig(decoder_type), mem_type, _batch_size); _loader_module->start_loading(); } -std::shared_ptr VideoLoaderNode::get_loader_module() +std::shared_ptr VideoLoaderNode::get_loader_module() { if (!_loader_module) WRN("VideoLoaderNode's loader module is null, not initialized") diff --git a/rocAL/rocAL/source/loaders/video/node_video_loader_single_shard.cpp b/rocAL/rocAL/source/loaders/video/node_video_loader_single_shard.cpp index a8bfc0b155..aaeba7cba9 100644 --- a/rocAL/rocAL/source/loaders/video/node_video_loader_single_shard.cpp +++ b/rocAL/rocAL/source/loaders/video/node_video_loader_single_shard.cpp @@ -30,7 +30,7 @@ VideoLoaderSingleShardNode::VideoLoaderSingleShardNode(Image *output, void *devi _loader_module = std::make_shared(device_resources); } -void VideoLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, +void VideoLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type) { _decode_mode = decoder_mode; // for future use @@ -42,7 +42,7 @@ void VideoLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, c THROW("Shard is should be smaller than shard count") _loader_module->set_output_image(_outputs[0]); // Set reader and decoder config accordingly for the ImageLoaderNode - auto reader_cfg = VideoReaderConfig(storage_type, source_path, shuffle, loop); + auto reader_cfg = ReaderConfig(storage_type, source_path, shuffle, loop); reader_cfg.set_shard_count(shard_count); reader_cfg.set_shard_id(shard_id); reader_cfg.set_batch_count(load_batch_count); @@ -50,11 +50,11 @@ void VideoLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, c reader_cfg.set_frame_step(step); reader_cfg.set_frame_stride(stride); reader_cfg.set_video_properties(video_prop); - _loader_module->initialize(reader_cfg, VideoDecoderConfig(decoder_type), mem_type, _batch_size); + _loader_module->initialize(reader_cfg, DecoderConfig(decoder_type), mem_type, _batch_size); _loader_module->start_loading(); } -std::shared_ptr VideoLoaderSingleShardNode::get_loader_module() +std::shared_ptr VideoLoaderSingleShardNode::get_loader_module() { if (!_loader_module) WRN("VideoLoaderSingleShardNode's loader module is null, not initialized") diff --git a/rocAL/rocAL/source/loaders/video/video_loader.cpp b/rocAL/rocAL/source/loaders/video/video_loader.cpp index d21f514965..51d25ccad4 100644 --- a/rocAL/rocAL/source/loaders/video/video_loader.cpp +++ b/rocAL/rocAL/source/loaders/video/video_loader.cpp @@ -96,7 +96,7 @@ void VideoLoader::de_init() _is_initialized = false; } -VideoLoaderModuleStatus +LoaderModuleStatus VideoLoader::load_next() { return update_output_image(); @@ -119,7 +119,7 @@ void VideoLoader::stop_internal_thread() _load_thread.join(); } -void VideoLoader::initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool decoder_keep_original) +void VideoLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool decoder_keep_original) { if (_is_initialized) WRN("initialize() function is already called and loader module is initialized") @@ -160,11 +160,11 @@ void VideoLoader::start_loading() _load_thread = std::thread(&VideoLoader::load_routine, this); } -VideoLoaderModuleStatus +LoaderModuleStatus VideoLoader::load_routine() { LOG("Started the internal loader thread"); - VideoLoaderModuleStatus last_load_status = VideoLoaderModuleStatus::OK; + LoaderModuleStatus last_load_status = LoaderModuleStatus::OK; // Initially record number of all the frames that are going to be loaded, this is used to know how many still there while (_internal_thread_running) @@ -173,7 +173,7 @@ VideoLoader::load_routine() if (!_internal_thread_running) break; - auto load_status = VideoLoaderModuleStatus::NO_MORE_DATA_TO_READ; + auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; { load_status = _video_loader->load(data, _decoded_img_info._image_names, @@ -187,19 +187,19 @@ VideoLoader::load_routine() _sequence_frame_timestamps_vec, _output_image->info().color_format()); - if (load_status == VideoLoaderModuleStatus::OK) + if (load_status == LoaderModuleStatus::OK) { _circ_buff.set_image_info(_decoded_img_info); _circ_buff.push(); _image_counter += _output_image->info().batch_size(); } } - if (load_status != VideoLoaderModuleStatus::OK) + if (load_status != LoaderModuleStatus::OK) { if (last_load_status != load_status) { - if (load_status == VideoLoaderModuleStatus::NO_MORE_DATA_TO_READ || - load_status == VideoLoaderModuleStatus::NO_FILES_TO_READ) + if (load_status == LoaderModuleStatus::NO_MORE_DATA_TO_READ || + load_status == LoaderModuleStatus::NO_FILES_TO_READ) { LOG("Cycled through all images, count " + TOSTR(_image_counter)); } @@ -220,7 +220,7 @@ VideoLoader::load_routine() std::this_thread::sleep_for(std::chrono::seconds(1)); } } - return VideoLoaderModuleStatus::OK; + return LoaderModuleStatus::OK; } bool VideoLoader::is_out_of_data() @@ -228,14 +228,14 @@ bool VideoLoader::is_out_of_data() return (remaining_count() < _sequence_count); } -VideoLoaderModuleStatus +LoaderModuleStatus VideoLoader::update_output_image() { - VideoLoaderModuleStatus status = VideoLoaderModuleStatus::OK; + LoaderModuleStatus status = LoaderModuleStatus::OK; if (is_out_of_data()) - return VideoLoaderModuleStatus::NO_MORE_DATA_TO_READ; + return LoaderModuleStatus::NO_MORE_DATA_TO_READ; if (_stopped) - return VideoLoaderModuleStatus::OK; + return LoaderModuleStatus::OK; // _circ_buff.get_read_buffer_x() is blocking and puts the caller on sleep until new images are written to the _circ_buff //if (_mem_type == RocalMemType::OCL) @@ -244,7 +244,7 @@ VideoLoader::update_output_image() auto data_buffer = _circ_buff.get_read_buffer_dev(); _swap_handle_time.start(); if (_output_image->swap_handle(data_buffer) != 0) - return VideoLoaderModuleStatus ::DEVICE_BUFFER_SWAP_FAILED; + return LoaderModuleStatus ::DEVICE_BUFFER_SWAP_FAILED; _swap_handle_time.end(); } else @@ -252,11 +252,11 @@ VideoLoader::update_output_image() auto data_buffer = _circ_buff.get_read_buffer_host(); _swap_handle_time.start(); if (_output_image->swap_handle(data_buffer) != 0) - return VideoLoaderModuleStatus::HOST_BUFFER_SWAP_FAILED; + return LoaderModuleStatus::HOST_BUFFER_SWAP_FAILED; _swap_handle_time.end(); } if (_stopped) - return VideoLoaderModuleStatus::OK; + return LoaderModuleStatus::OK; _output_decoded_img_info = _circ_buff.get_image_info(); _output_names = _output_decoded_img_info._image_names; _output_image->update_image_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); @@ -273,7 +273,7 @@ Timing VideoLoader::timing() return t; } -VideoLoaderModuleStatus VideoLoader::set_cpu_affinity(cpu_set_t cpu_mask) +LoaderModuleStatus VideoLoader::set_cpu_affinity(cpu_set_t cpu_mask) { if (!_internal_thread_running) THROW("set_cpu_affinity() should be called after start_loading function is called") @@ -284,10 +284,10 @@ VideoLoaderModuleStatus VideoLoader::set_cpu_affinity(cpu_set_t cpu_mask) if (ret != 0) WRN("Error calling pthread_setaffinity_np: " + TOSTR(ret)); #endif - return VideoLoaderModuleStatus::OK; + return LoaderModuleStatus::OK; } -VideoLoaderModuleStatus VideoLoader::set_cpu_sched_policy(struct sched_param sched_policy) +LoaderModuleStatus VideoLoader::set_cpu_sched_policy(struct sched_param sched_policy) { if (!_internal_thread_running) THROW("set_cpu_sched_policy() should be called after start_loading function is called") @@ -297,7 +297,7 @@ VideoLoaderModuleStatus VideoLoader::set_cpu_sched_policy(struct sched_param sch if (ret != 0) WRN("Unsuccessful in setting thread realtime priority for loader thread err = " + TOSTR(ret)) #endif - return VideoLoaderModuleStatus::OK; + return LoaderModuleStatus::OK; } std::vector VideoLoader::get_id() diff --git a/rocAL/rocAL/source/loaders/video/video_loader_sharded.cpp b/rocAL/rocAL/source/loaders/video/video_loader_sharded.cpp index 5f2cf90b12..e2ee4240e8 100644 --- a/rocAL/rocAL/source/loaders/video/video_loader_sharded.cpp +++ b/rocAL/rocAL/source/loaders/video/video_loader_sharded.cpp @@ -61,10 +61,10 @@ void VideoLoaderSharded::fast_forward_through_empty_loaders() increment_loader_idx(); } -VideoLoaderModuleStatus VideoLoaderSharded::load_next() +LoaderModuleStatus VideoLoaderSharded::load_next() { if (!_initialized) - return VideoLoaderModuleStatus::NOT_INITIALIZED; + return LoaderModuleStatus::NOT_INITIALIZED; increment_loader_idx(); @@ -75,7 +75,7 @@ VideoLoaderModuleStatus VideoLoaderSharded::load_next() return ret; } -void VideoLoaderSharded::initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, +void VideoLoaderSharded::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) { if (_initialized) diff --git a/rocAL/rocAL/source/loaders/video/video_read_and_decode.cpp b/rocAL/rocAL/source/loaders/video/video_read_and_decode.cpp index 38271908dd..72b74db3dc 100644 --- a/rocAL/rocAL/source/loaders/video/video_read_and_decode.cpp +++ b/rocAL/rocAL/source/loaders/video/video_read_and_decode.cpp @@ -65,7 +65,7 @@ VideoReadAndDecode::~VideoReadAndDecode() _video_decoder.clear(); } -void VideoReadAndDecode::create(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, int batch_size) +void VideoReadAndDecode::create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size) { _sequence_length = reader_config.get_sequence_length(); _stride = reader_config.get_frame_stride(); @@ -76,10 +76,9 @@ void VideoReadAndDecode::create(VideoReaderConfig reader_config, VideoDecoderCon set_video_process_count(_video_count); _video_decoder.resize(_video_process_count); _video_names = _video_prop.video_file_names; - _sequence_count = _batch_size / _sequence_length; - _decompressed_buff_ptrs.resize(_sequence_count); - _actual_decoded_width.resize(_sequence_count); - _actual_decoded_height.resize(_sequence_count); + _decompressed_buff_ptrs.resize(_batch_size); + _actual_decoded_width.resize(_batch_size); + _actual_decoded_height.resize(_batch_size); _video_decoder_config = decoder_config; // Initialize the ffmpeg context once for the video files. @@ -142,7 +141,7 @@ void VideoReadAndDecode::decode_sequence(size_t sequence_index) } } -VideoLoaderModuleStatus +LoaderModuleStatus VideoReadAndDecode::load(unsigned char *buff, std::vector &names, const size_t max_decoded_width, @@ -159,13 +158,13 @@ VideoReadAndDecode::load(unsigned char *buff, THROW("Zero image dimension is not valid") if (!buff) THROW("Null pointer passed as output buffer") - if (_video_reader->count_items() < _sequence_count) - return VideoLoaderModuleStatus::NO_MORE_DATA_TO_READ; + if (_video_reader->count_items() < _batch_size) + return LoaderModuleStatus::NO_MORE_DATA_TO_READ; std::vector sequence_start_framenum; std::vector> sequence_frame_timestamps; - sequence_start_framenum.resize(_sequence_count); - sequence_frame_timestamps.resize(_sequence_count); - for (size_t it = 0; it < (_sequence_count); it++) + sequence_start_framenum.resize(_batch_size); + sequence_frame_timestamps.resize(_batch_size); + for (size_t it = 0; it < _batch_size; it++) sequence_frame_timestamps[it].resize(_sequence_length); const auto ret = video_interpret_color_format(output_color_format); const unsigned output_planes = std::get<1>(ret); @@ -181,9 +180,9 @@ VideoReadAndDecode::load(unsigned char *buff, std::vector parallel_decode_sequences; std::vector video_index; size_t parallel_sequence_count = 0; - _sequence_start_frame_num.resize(_sequence_count); - _sequence_video_path.resize(_sequence_count); - for (size_t i = 0; i < _sequence_count; i++) + _sequence_start_frame_num.resize(_batch_size); + _sequence_video_path.resize(_batch_size); + for (size_t i = 0; i < _batch_size; i++) { auto sequence_info = _video_reader->get_sequence_info(); _sequence_start_frame_num[i] = sequence_info.start_frame_number; @@ -255,7 +254,7 @@ VideoReadAndDecode::load(unsigned char *buff, _decode_time.end(); // Debug timing - for (size_t i = 0; i < _sequence_count; i++) + for (size_t i = 0; i < _batch_size; i++) { std::vector substrings1, substrings2; char delim = '/'; @@ -268,9 +267,9 @@ VideoReadAndDecode::load(unsigned char *buff, for (size_t s = 0; s < _sequence_length; s++) { sequence_frame_timestamps[i][s] = convert_framenum_to_timestamp(_sequence_start_frame_num[i] + (s * _stride)); - roi_width[(i * _sequence_length) + s] = _actual_decoded_width[i]; - roi_height[(i * _sequence_length) + s] = _actual_decoded_height[i]; } + roi_width[i] = _actual_decoded_width[i]; + roi_height[i] = _actual_decoded_height[i]; names[i] = video_idx + "#" + file_name + "_" + std::to_string(_sequence_start_frame_num[i]); } sequence_start_framenum_vec.insert(sequence_start_framenum_vec.begin(), sequence_start_framenum); @@ -278,6 +277,6 @@ VideoReadAndDecode::load(unsigned char *buff, _sequence_start_frame_num.clear(); _sequence_video_path.clear(); _sequence_video_idx.clear(); - return VideoLoaderModuleStatus::OK; + return LoaderModuleStatus::OK; } #endif diff --git a/rocAL/rocAL/source/pipeline/master_graph.cpp b/rocAL/rocAL/source/pipeline/master_graph.cpp index 1f38e70076..ddfe4e5fab 100644 --- a/rocAL/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/rocAL/source/pipeline/master_graph.cpp @@ -539,6 +539,10 @@ void MasterGraph::output_routine() _meta_data_graph->update_box_encoder_meta_data(&_anchors, full_batch_meta_data, _criteria, _offset, _scale, _means, _stds); } _bencode_time.end(); +#ifdef ROCAL_VIDEO + _sequence_start_framenum_vec.insert(_sequence_start_framenum_vec.begin(), _loader_module->get_sequence_start_frame_number()); + _sequence_frame_timestamps_vec.insert(_sequence_frame_timestamps_vec.begin(), _loader_module->get_sequence_frame_timestamps()); +#endif _ring_buffer.set_meta_data(full_batch_image_names, full_batch_meta_data); _ring_buffer.push(); // Image data and metadata is now stored in output the ring_buffer, increases it's level by 1 } diff --git a/rocAL/rocAL/source/pipeline/node.cpp b/rocAL/rocAL/source/pipeline/node.cpp index fd0537bbe9..02cb0a1771 100644 --- a/rocAL/rocAL/source/pipeline/node.cpp +++ b/rocAL/rocAL/source/pipeline/node.cpp @@ -43,6 +43,8 @@ Node::create(std::shared_ptr graph) vx_size num_of_dims = 2; vx_size stride[num_of_dims]; std::vector roi_dims = {_batch_size, 4}; + if(_inputs[0]->info().layout() == RocalTensorlayout::NFCHW || _inputs[0]->info().layout() == RocalTensorlayout::NFHWC) + roi_dims = {_inputs[0]->info().dims()[0] * _inputs[0]->info().dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions stride[0] = sizeof(vx_uint32); stride[0] = sizeof(vx_uint32); stride[1] = stride[0] * roi_dims[0]; vx_enum mem_type = VX_MEMORY_TYPE_HOST; diff --git a/rocAL/rocAL/source/pipeline/tensor.cpp b/rocAL/rocAL/source/pipeline/tensor.cpp index 95e5e25122..d026630aff 100644 --- a/rocAL/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/rocAL/source/pipeline/tensor.cpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -108,8 +108,10 @@ bool operator==(const rocalTensorInfo &rhs, const rocalTensorInfo &lhs) { void rocalTensorInfo::reset_tensor_roi_buffers() { - if(!_roi_buf) - allocate_host_or_pinned_mem(&_roi_buf, _batch_size * 4 * sizeof(unsigned), _mem_type); + if(!_roi_buf) { + size_t roi_size = (_layout == RocalTensorlayout::NFCHW || _layout == RocalTensorlayout::NFHWC) ? _dims[0] * _dims[1] : _batch_size; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions + allocate_host_or_pinned_mem((void **)&_roi_buf, roi_size * 4 * sizeof(unsigned), _mem_type); + } if (_is_image) { auto roi = get_roi(); for (unsigned i = 0; i < _batch_size; i++) { diff --git a/rocAL/rocAL/source/readers/video/video_file_source_reader.cpp b/rocAL/rocAL/source/readers/video/video_file_source_reader.cpp index 48cedc3486..31326e3348 100644 --- a/rocAL/rocAL/source/readers/video/video_file_source_reader.cpp +++ b/rocAL/rocAL/source/readers/video/video_file_source_reader.cpp @@ -46,7 +46,7 @@ unsigned VideoFileSourceReader::count_items() return ((ret <= 0) ? 0 : ret); } -VideoReader::Status VideoFileSourceReader::initialize(VideoReaderConfig desc) +VideoReader::Status VideoFileSourceReader::initialize(ReaderConfig desc) { auto ret = VideoReader::Status::OK; _sequence_id = 0; diff --git a/rocAL/rocAL/source/readers/video/video_reader_factory.cpp b/rocAL/rocAL/source/readers/video/video_reader_factory.cpp index 0219dd66d9..0c517b2097 100644 --- a/rocAL/rocAL/source/readers/video/video_reader_factory.cpp +++ b/rocAL/rocAL/source/readers/video/video_reader_factory.cpp @@ -26,9 +26,9 @@ THE SOFTWARE. #include "video_file_source_reader.h" #ifdef ROCAL_VIDEO -std::shared_ptr create_video_reader(VideoReaderConfig config) { +std::shared_ptr create_video_reader(ReaderConfig config) { switch(config.type()) { - case VideoStorageType::VIDEO_FILE_SYSTEM: + case StorageType::VIDEO_FILE_SYSTEM: { auto ret = std::make_shared(); if(ret->initialize(config) != VideoReader::Status::OK)