From d444fb9c20737bdc3471b2bc643ca6d91dd306e7 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Fri, 6 Oct 2023 07:56:26 +0000 Subject: [PATCH 01/33] Adding mask pipeline support for rocAL --- rocAL/include/api/rocal_api_data_transfer.h | 3 +- rocAL/include/api/rocal_api_meta_data.h | 14 +- .../include/meta_data/coco_meta_data_reader.h | 6 +- rocAL/include/meta_data/meta_data.h | 11 +- rocAL/include/meta_data/meta_data_reader.h | 16 +- rocAL/include/pipeline/master_graph.h | 5 +- rocAL/include/pipeline/ring_buffer.h | 8 +- rocAL/include/pipeline/tensor.h | 18 +- .../readers/image/coco_file_source_reader.h | 3 +- rocAL/rocAL_hip/rocal_hip_kernels.cpp | 83 ++++++--- rocAL/rocAL_hip/rocal_hip_kernels.h | 8 +- rocAL/source/api/rocal_api_augmentation.cpp | 16 +- rocAL/source/api/rocal_api_data_transfer.cpp | 4 +- rocAL/source/api/rocal_api_meta_data.cpp | 41 ++++- .../meta_data/coco_meta_data_reader.cpp | 30 ++- .../meta_node_resize_mirror_normalize.cpp | 5 + rocAL/source/pipeline/master_graph.cpp | 174 ++++++++++-------- rocAL/source/pipeline/ring_buffer.cpp | 40 +++- rocAL/source/pipeline/tensor.cpp | 51 +---- .../readers/image/coco_file_source_reader.cpp | 71 ++++++- rocAL_pybind/amd/rocal/pipeline.py | 14 ++ rocAL_pybind/amd/rocal/readers.py | 7 +- rocAL_pybind/amd/rocal/types.py | 2 + rocAL_pybind/rocal_pybind.cpp | 54 ++++++ 24 files changed, 477 insertions(+), 207 deletions(-) diff --git a/rocAL/include/api/rocal_api_data_transfer.h b/rocAL/include/api/rocal_api_data_transfer.h index 0d60e0b94..f94819273 100644 --- a/rocAL/include/api/rocal_api_data_transfer.h +++ b/rocAL/include/api/rocal_api_data_transfer.h @@ -63,7 +63,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type); + bool reverse_channels, RocalOutputMemType output_mem_type, int max_height = 0, int max_width = 0); /*! * \brief Sets the output images in the RocalContext @@ -74,7 +74,6 @@ extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, */ extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images); - /*! * \brief gives the list of output tensors from rocal context * \ingroup group_rocal_data_transfer diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index dfe961acd..5c5a305dd 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -76,9 +76,14 @@ extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContex * \ingroup group_rocal_meta_data * \param [in] rocal_context rocal context * \param [in] source_path path to the coco json file + * \param [in] mask enable polygon masks + * \param [in] ltrb If set to True, bboxes are returned as [left, top, right, bottom]. If set to False, the bboxes are returned as [x, y, width, height] + * \param [in] is_box_encoder If set to True, bboxes are returned as encoded bboxes using the anchors + * \param [in] avoid_class_remapping If set to True, classes are returned directly. Otherwise, classes are mapped to consecutive values + * \param [in] aspect_ratio_grouping If set to True, images are sorted by their aspect ratio and returned * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false); /*! \brief create coco reader key points * \ingroup group_rocal_meta_data @@ -209,6 +214,13 @@ extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext */ extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int* buf); +/*! \brief get ROI image sizes + * \ingroup group_rocal_meta_data + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with ROI image size info for the images in the output batch + */ +extern "C" void ROCAL_API_CALL rocalGetROIImageSizes(RocalContext rocal_context, int* buf); + /*! \brief create text cifar10 label reader * \ingroup group_rocal_meta_data * \param [in] rocal_context rocal context diff --git a/rocAL/include/meta_data/coco_meta_data_reader.h b/rocAL/include/meta_data/coco_meta_data_reader.h index aec539bed..5e5efc67b 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader.h +++ b/rocAL/include/meta_data/coco_meta_data_reader.h @@ -32,12 +32,12 @@ class COCOMetaDataReader : public MetaDataReader { public: void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; + ImgSize lookup_image_size(const std::string& image_name) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - const std::map>& get_map_content() override { return _map_content; } COCOMetaDataReader(); @@ -45,12 +45,14 @@ class COCOMetaDataReader : public MetaDataReader { pMetaDataBatch _output; std::string _path; int meta_data_reader_type; + bool _avoid_class_remapping; void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, int image_id = 0); - void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count); // To add Mask coordinates to Metadata struct + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count, int image_id = 0); // To add Mask coordinates to Metadata struct bool exists(const std::string& image_name) override; std::map> _map_content; std::map>::iterator _itr; std::map _map_img_sizes; + std::map _map_img_names; std::map::iterator itr; std::map _label_info; std::map::iterator _it_label; diff --git a/rocAL/include/meta_data/meta_data.h b/rocAL/include/meta_data/meta_data.h index 2d3ba9a26..cf0b9a458 100644 --- a/rocAL/include/meta_data/meta_data.h +++ b/rocAL/include/meta_data/meta_data.h @@ -104,6 +104,7 @@ typedef class MetaDataInfo { int img_id = -1; std::string img_name = ""; ImgSize img_size = {}; + ImgSize img_roi_size = {}; } MetaDataInfo; class MetaData { @@ -121,9 +122,11 @@ class MetaData { virtual JointsData& get_joints_data() = 0; virtual void set_joints_data(JointsData* joints_data) = 0; ImgSize& get_img_size() { return _info.img_size; } + ImgSize& get_img_roi_size() { return _info.img_roi_size; } std::string& get_image_name() { return _info.img_name; } int& get_image_id() { return _info.img_id; } void set_img_size(ImgSize img_size) { _info.img_size = std::move(img_size); } + void set_img_roi_size(ImgSize img_roi_size) { _info.img_roi_size = std::move(img_roi_size); } void set_img_id(int img_id) { _info.img_id = img_id; } void set_img_name(std::string img_name) { _info.img_name = img_name; } void set_metadata_info(MetaDataInfo info) { _info = std::move(info); } @@ -167,13 +170,14 @@ class BoundingBox : public Label { struct PolygonMask : public BoundingBox { public: - PolygonMask(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count) { + PolygonMask(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count, int img_id = 0) { _bb_cords = std::move(bb_cords); _label_ids = std::move(bb_label_ids); _info.img_size = std::move(img_size); _mask_cords = std::move(mask_cords); _polygon_count = std::move(polygon_count); _vertices_count = std::move(vertices_count); + _info.img_id = img_id; } std::vector& get_polygon_count() override { return _polygon_count; } std::vector>& get_vertices_count() override { return _vertices_count; } @@ -207,20 +211,24 @@ class MetaDataInfoBatch { std::vector img_ids = {}; std::vector img_names = {}; std::vector img_sizes = {}; + std::vector img_roi_sizes = {}; void clear() { img_ids.clear(); img_names.clear(); img_sizes.clear(); + img_roi_sizes.clear(); } void resize(int batch_size) { img_ids.resize(batch_size); img_names.resize(batch_size); img_sizes.resize(batch_size); + img_roi_sizes.resize(batch_size); } void insert(MetaDataInfoBatch& other) { img_sizes.insert(img_sizes.end(), other.img_sizes.begin(), other.img_sizes.end()); img_ids.insert(img_ids.end(), other.img_ids.begin(), other.img_ids.end()); img_names.insert(img_names.end(), other.img_names.begin(), other.img_names.end()); + img_roi_sizes.insert(img_roi_sizes.end(), other.img_roi_sizes.begin(), other.img_roi_sizes.end()); } }; @@ -249,6 +257,7 @@ class MetaDataBatch { std::vector& get_image_id_batch() { return _info_batch.img_ids; } std::vector& get_image_names_batch() { return _info_batch.img_names; } ImgSizes& get_img_sizes_batch() { return _info_batch.img_sizes; } + ImgSizes& get_img_roi_sizes_batch() { return _info_batch.img_roi_sizes; } MetaDataInfoBatch& get_info_batch() { return _info_batch; } void set_metadata_type(MetaDataType metadata_type) { _type = metadata_type; } MetaDataType get_metadata_type() { return _type; } diff --git a/rocAL/include/meta_data/meta_data_reader.h b/rocAL/include/meta_data/meta_data_reader.h index e0a334ade..b16722c4e 100644 --- a/rocAL/include/meta_data/meta_data_reader.h +++ b/rocAL/include/meta_data/meta_data_reader.h @@ -56,16 +56,20 @@ struct MetaDataConfig { unsigned _frame_stride; unsigned _out_img_width; unsigned _out_img_height; + bool _avoid_class_remapping; + bool _aspect_ratio_grouping; public: - MetaDataConfig(const MetaDataType& type, const MetaDataReaderType& reader_type, const std::string& path, const std::map& feature_key_map = std::map(), const std::string file_prefix = std::string(), const unsigned& sequence_length = 3, const unsigned& frame_step = 3, const unsigned& frame_stride = 1) - : _type(type), _reader_type(reader_type), _path(path), _feature_key_map(feature_key_map), _file_prefix(file_prefix), _sequence_length(sequence_length), _frame_step(frame_step), _frame_stride(frame_stride) {} + MetaDataConfig(const MetaDataType& type, const MetaDataReaderType& reader_type, const std::string& path, const std::map& feature_key_map = std::map(), const std::string file_prefix = std::string(), const unsigned& sequence_length = 3, const unsigned& frame_step = 3, const unsigned& frame_stride = 1, bool avoid_class_remapping = false) + : _type(type), _reader_type(reader_type), _path(path), _feature_key_map(feature_key_map), _file_prefix(file_prefix), _sequence_length(sequence_length), _frame_step(frame_step), _frame_stride(frame_stride), _avoid_class_remapping(avoid_class_remapping) {} MetaDataConfig() = delete; MetaDataType type() const { return _type; } MetaDataReaderType reader_type() const { return _reader_type; } std::string path() const { return _path; } std::map feature_key_map() const { return _feature_key_map; } std::string file_prefix() const { return _file_prefix; } + bool class_remapping() const { return _avoid_class_remapping; } + bool aspect_ratio_grouping() const { return _aspect_ratio_grouping; } unsigned sequence_length() const { return _sequence_length; } unsigned frame_step() const { return _frame_step; } unsigned frame_stride() const { return _frame_stride; } @@ -73,9 +77,14 @@ struct MetaDataConfig { unsigned out_img_height() const { return _out_img_height; } void set_out_img_width(unsigned out_img_width) { _out_img_width = out_img_width; } void set_out_img_height(unsigned out_img_height) { _out_img_height = out_img_height; } + void set_avoid_class_remapping(bool avoid_class_remapping) { _avoid_class_remapping = avoid_class_remapping; } + void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { _aspect_ratio_grouping = aspect_ratio_grouping; } }; class MetaDataReader { + private: + bool _aspect_ratio_grouping = false; + public: enum class Status { OK = 0 @@ -88,4 +97,7 @@ class MetaDataReader { virtual const std::map>& get_map_content() = 0; virtual bool exists(const std::string& image_name) = 0; virtual bool set_timestamp_mode() = 0; + virtual ImgSize lookup_image_size(const std::string& image_name) { return {}; } + void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { _aspect_ratio_grouping = aspect_ratio_grouping; } + bool aspect_ratio_grouping() const { return _aspect_ratio_grouping; } }; diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index 98349ae86..dd5662c93 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -82,7 +82,7 @@ class MasterGraph { Status reset(); size_t remaining_count(); MasterGraph::Status to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2, - float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type); + float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, int max_height = 0, int max_width = 0); Status copy_output(unsigned char *out_ptr, size_t out_size_in_bytes); Status copy_out_tensor_planar(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type); @@ -106,7 +106,8 @@ class MasterGraph { Tensor *create_loader_output_tensor(const TensorInfo &info); std::vector create_label_reader(const char *source_path, MetaDataReaderType reader_type); std::vector create_video_label_reader(const char *source_path, MetaDataReaderType reader_type, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); - std::vector create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); + std::vector create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false, + bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); std::vector create_tf_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type, const std::map feature_key_map); std::vector create_caffe_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type); std::vector create_caffe2_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type); diff --git a/rocAL/include/pipeline/ring_buffer.h b/rocAL/include/pipeline/ring_buffer.h index fc6ba9e0d..a5fc53183 100644 --- a/rocAL/include/pipeline/ring_buffer.h +++ b/rocAL/include/pipeline/ring_buffer.h @@ -46,12 +46,12 @@ class RingBuffer { ///\param dev ///\param sub_buffer_size ///\param sub_buffer_count - void init(RocalMemType mem_type, void *dev, std::vector &sub_buffer_size); + void init(RocalMemType mem_type, void *dev, std::vector &sub_buffer_size, size_t roi_buffer_size); void initBoxEncoderMetaData(RocalMemType mem_type, size_t encoded_bbox_size, size_t encoded_labels_size); void init_metadata(RocalMemType mem_type, std::vector &sub_buffer_size); void release_gpu_res(); - std::vector get_read_buffers(); - std::vector get_write_buffers(); + std::pair, std::vector> get_read_buffers(); + std::pair, std::vector> get_write_buffers(); std::pair get_box_encode_write_buffers(); std::pair get_box_encode_read_buffers(); MetaDataNamePair &get_meta_data(); @@ -86,6 +86,8 @@ class RingBuffer { std::condition_variable _wait_for_unload; std::vector> _dev_sub_buffer; std::vector> _host_sub_buffers; + std::vector> _dev_roi_buffers; + std::vector> _host_roi_buffers; std::vector> _host_meta_data_buffers; std::vector _dev_bbox_buffer; std::vector _dev_labels_buffer; diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index dd7868e88..54b0bf524 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -84,9 +84,6 @@ class TensorInfo { RocalTensorDataType data_type, RocalTensorlayout layout, RocalColorFormat color_format); - //! Copy constructor - TensorInfo(const TensorInfo& info); - ~TensorInfo(); // Setting properties required for Image / Video void set_roi_type(RocalROIType roi_type) { _roi_type = roi_type; } void set_data_type(RocalTensorDataType data_type) { @@ -211,7 +208,7 @@ class TensorInfo { RocalROIType roi_type() const { return _roi_type; } RocalTensorDataType data_type() const { return _data_type; } RocalTensorlayout layout() const { return _layout; } - RocalROI* get_roi() const { return (RocalROI*)_roi_buf; } + RocalROI* get_roi() const { return (RocalROI*)_roi.get(); } RocalColorFormat color_format() const { return _color_format; } Type type() const { return _type; } uint64_t data_type_size() { @@ -221,6 +218,14 @@ class TensorInfo { bool is_image() const { return _is_image; } void set_metadata() { _is_metadata = true; } bool is_metadata() const { return _is_metadata; } + void set_roi_ptr(unsigned* roi_ptr) { + auto deleter = [&](unsigned* ptr) {}; // Empty destructor used, since memory is handled by the pipeline + _roi.reset(roi_ptr, deleter); + } + void copy_roi(void* roi_buffer) { + if (_roi != nullptr && roi_buffer != nullptr) + memcpy((void*)roi_buffer, (const void*)_roi.get(), _batch_size * sizeof(RocalROI)); + } private: Type _type = Type::UNKNOWN; //!< tensor type, whether is virtual tensor, created from handle or is a regular tensor @@ -233,7 +238,8 @@ class TensorInfo { RocalTensorDataType _data_type = RocalTensorDataType::FP32; //!< tensor data type RocalTensorlayout _layout = RocalTensorlayout::NONE; //!< layout of the tensor RocalColorFormat _color_format; //!< color format of the image - void* _roi_buf = nullptr; + unsigned* _roi_buf = nullptr; + std::shared_ptr _roi; uint64_t _data_type_size = tensor_data_size(_data_type); uint64_t _data_size = 0; std::vector _max_shape; //!< stores the the width and height dimensions in the tensor @@ -275,6 +281,8 @@ class Tensor : public rocalTensor { void create_roi_tensor_from_handle(void** handle); void update_tensor_roi(const std::vector& width, const std::vector& height); void reset_tensor_roi() { _info.reset_tensor_roi_buffers(); } + void set_roi(unsigned* roi_ptr) { _info.set_roi_ptr(roi_ptr); } + void copy_roi(void* roi_buffer) { _info.copy_roi(roi_buffer); } vx_tensor get_roi_tensor() { return _vx_roi_handle; } // create_from_handle() no internal memory allocation is done here since // tensor's handle should be swapped with external buffers before usage diff --git a/rocAL/include/readers/image/coco_file_source_reader.h b/rocAL/include/readers/image/coco_file_source_reader.h index 928fea12f..0e3a11bb8 100644 --- a/rocAL/include/readers/image/coco_file_source_reader.h +++ b/rocAL/include/readers/image/coco_file_source_reader.h @@ -76,7 +76,8 @@ class COCOFileSourceReader : public Reader { DIR *_src_dir; DIR *_sub_dir; struct dirent *_entity; - std::vector _file_names; + std::vector _file_names, _sorted_file_names; + std::vector _aspect_ratios; std::vector _files; unsigned _curr_file_idx; FILE *_current_fPtr; diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.cpp b/rocAL/rocAL_hip/rocal_hip_kernels.cpp index d238d0366..449a8672c 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.cpp +++ b/rocAL/rocAL_hip/rocal_hip_kernels.cpp @@ -32,6 +32,7 @@ Hip_CopyInt8ToNHWC_fp32( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, + uint2 outDims, float3 multiplier, float3 offset, unsigned int reverse_channels) { @@ -40,19 +41,21 @@ Hip_CopyInt8ToNHWC_fp32( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; + const int maxOutH = outDims.x; + const int maxOutW = outDims.y; const int img_offset = C * W * H; + const int out_img_offset = C * maxOutW * maxOutH; - if ((x >= W) || (y >= H)) + if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { unsigned int srcIdx = (y * W + x) * C; // src is RGB - unsigned int dstIdx = (y * W + x) * C; + unsigned int dstIdx = (y * maxOutW + x) * C; // copy float3 pixels to dst if (C == 3) { float3 dst; - const uchar *inp_img = &inp_image_u8[n * img_offset]; - float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * img_offset); + float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * out_img_offset); if (reverse_channels) dst = make_float3((float)inp_img[srcIdx + 2], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx]) * multiplier + offset; else @@ -62,7 +65,7 @@ Hip_CopyInt8ToNHWC_fp32( out_tensor[dstIdx + 2] = dst.z; } else { const uchar *inp_img = &inp_image_u8[n * img_offset + dst_buf_offset]; - float *out_tensor = (float *)output_tensor + dst_buf_offset + n * img_offset; + float *out_tensor = (float *)output_tensor + dst_buf_offset + n * out_img_offset; out_tensor[dstIdx] = (float)inp_img[srcIdx] * multiplier.x + offset.x; } } @@ -74,6 +77,7 @@ Hip_CopyInt8ToNHWC_fp16( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, + uint2 outDims, float3 multiplier, float3 offset, const unsigned int reverse_channels) { @@ -82,16 +86,19 @@ Hip_CopyInt8ToNHWC_fp16( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; + const int maxOutH = outDims.x; + const int maxOutW = outDims.y; const int img_offset = C * W * H; + const int out_img_offset = C * maxOutW * maxOutH; - if ((x >= W) || (y >= H)) + if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { - __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * img_offset; + __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * out_img_offset; unsigned int srcIdx = (y * W + x) * C; // copy float3 pixels to dst if (C == 3) { - unsigned int dstIdx = y * W + x * 3; + unsigned int dstIdx = y * maxOutW + x * 3; const uchar *inp_img = &inp_image_u8[n * img_offset]; float3 dst; if (reverse_channels) @@ -102,9 +109,9 @@ Hip_CopyInt8ToNHWC_fp16( out_tensor[dstIdx + 1] = __float2half(dst.y); out_tensor[dstIdx + 2] = __float2half(dst.z); } else { - unsigned int dstIdx = y * W + x; + unsigned int dstIdx = y * maxOutW + x; const uchar *inp_img = &inp_image_u8[n * img_offset]; - float *out_tensor = (float *)output_tensor + n * img_offset; + float *out_tensor = (float *)output_tensor + n * out_img_offset; out_tensor[dstIdx] = __float2half((float)inp_img[srcIdx] * multiplier.x + offset.x); } } @@ -116,6 +123,7 @@ Hip_CopyInt8ToNCHW_fp32( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, + uint2 outDims, float3 multiplier, float3 offset, unsigned int reverse_channels) { @@ -124,16 +132,20 @@ Hip_CopyInt8ToNCHW_fp32( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; + const int maxOutH = outDims.x; + const int maxOutW = outDims.y; const int img_offset = C * W * H; + const int out_img_offset = C * maxOutW * maxOutH; - if ((x >= W) || (y >= H)) + if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { unsigned int srcIdx = (y * W + x) * C; - unsigned int dstIdx = y * W + x; + unsigned int dstIdx = y * maxOutW + x; // copy float3 pixels to dst const uchar *inp_img = &inp_image_u8[n * img_offset]; - float *out_tensor = (float *)output_tensor + n * img_offset + dst_buf_offset; + float *out_tensor = (float *)output_tensor + n * out_img_offset + dst_buf_offset; + unsigned int stride = maxOutW * maxOutH; if (C == 3) { float3 dst; if (reverse_channels) @@ -141,8 +153,8 @@ Hip_CopyInt8ToNCHW_fp32( else dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; out_tensor[dstIdx] = dst.x; - out_tensor[dstIdx + W * H] = dst.y; - out_tensor[dstIdx + W * H * 2] = dst.z; + out_tensor[dstIdx + stride] = dst.y; + out_tensor[dstIdx + stride * 2] = dst.z; } else { out_tensor[dstIdx] = (float)inp_img[srcIdx] * multiplier.x + offset.x; } @@ -155,6 +167,7 @@ Hip_CopyInt8ToNCHW_fp16( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, + uint2 outDims, float3 multiplier, float3 offset, const unsigned int reverse_channels) { @@ -163,16 +176,20 @@ Hip_CopyInt8ToNCHW_fp16( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; + const int maxOutH = outDims.x; + const int maxOutW = outDims.y; const int img_offset = C * W * H; + const int out_img_offset = C * maxOutW * maxOutH; - if ((x >= W) || (y >= H)) + if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { - __half *out_tensor = (__half *)output_tensor + n * img_offset + dst_buf_offset; + __half *out_tensor = (__half *)output_tensor + n * out_img_offset + dst_buf_offset; const uchar *inp_img = &inp_image_u8[n * img_offset]; unsigned int srcIdx = (y * W + x) * C; // copy float3 pixels to dst - unsigned int dstIdx = y * W + x; + unsigned int dstIdx = y * maxOutW + x; + unsigned int stride = maxOutW * maxOutH; if (C == 3) { float3 dst; if (reverse_channels) @@ -180,8 +197,8 @@ Hip_CopyInt8ToNCHW_fp16( else dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; out_tensor[dstIdx] = __float2half(dst.x); - out_tensor[dstIdx + W * H] = __float2half(dst.y); - out_tensor[dstIdx + W * H * 2] = __float2half(dst.z); + out_tensor[dstIdx + stride] = __float2half(dst.y); + out_tensor[dstIdx + stride * 2] = __float2half(dst.z); } else { out_tensor[dstIdx] = __float2half((float)inp_img[srcIdx] * multiplier.x + offset.x); } @@ -204,15 +221,22 @@ int HipExecCopyInt8ToNHWC( float offset1, float offset2, unsigned int reverse_channels, - unsigned int fp16) { + unsigned int fp16, + const unsigned max_output_height, + const unsigned max_output_width) { int localThreads_x = 16, localThreads_y = 16; + uint2 outDims; + if ((max_output_height == 0) || (max_output_width == 0)) + outDims = make_uint2(h, w); + else + outDims = make_uint2(max_output_height, max_output_width); int globalThreads_x = w, globalThreads_y = h; if (!fp16) { hipLaunchKernelGGL(Hip_CopyInt8ToNHWC_fp32, dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), + make_uint4(n, c, h, w), outDims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } else { @@ -220,7 +244,7 @@ int HipExecCopyInt8ToNHWC( dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), + make_uint4(n, c, h, w), outDims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } @@ -243,15 +267,22 @@ int HipExecCopyInt8ToNCHW( float offset1, float offset2, unsigned int reverse_channels, - unsigned int fp16) { + unsigned int fp16, + const unsigned max_output_height, + const unsigned max_output_width) { int localThreads_x = 16, localThreads_y = 16; + uint2 outDims; + if ((max_output_height == 0) || (max_output_width == 0)) + outDims = make_uint2(h, w); + else + outDims = make_uint2(max_output_height, max_output_width); int globalThreads_x = w, globalThreads_y = h; if (!fp16) { hipLaunchKernelGGL(Hip_CopyInt8ToNCHW_fp32, dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), + make_uint4(n, c, h, w), outDims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } else { @@ -259,7 +290,7 @@ int HipExecCopyInt8ToNCHW( dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), + make_uint4(n, c, h, w), outDims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.h b/rocAL/rocAL_hip/rocal_hip_kernels.h index 9c6d81884..0db801f59 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.h +++ b/rocAL/rocAL_hip/rocal_hip_kernels.h @@ -38,7 +38,9 @@ int HipExecCopyInt8ToNHWC( float offset1, float offset2, unsigned int reverse_channels, - unsigned int fp16); + unsigned int fp16, + const unsigned max_output_height = 0, + const unsigned max_output_width = 0); int HipExecCopyInt8ToNCHW( hipStream_t stream, @@ -56,4 +58,6 @@ int HipExecCopyInt8ToNCHW( float offset1, float offset2, unsigned int reverse_channels, - unsigned int fp16); + unsigned int fp16, + const unsigned max_output_height = 0, + const unsigned max_output_width = 0); diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index b4ca8b42e..4137c50b6 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -554,16 +554,20 @@ RocalTensor ROCAL_API_CALL try { if ((dest_width | dest_height | resize_longer | resize_shorter) == 0) THROW("Atleast one size 'dest_width' or 'dest_height' or 'resize_shorter' or 'resize_longer' must be specified") - if ((dest_width | dest_height) && (resize_longer | resize_shorter)) + if ((dest_width | dest_height) && (resize_longer | resize_shorter) && (scaling_mode != RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX)) THROW("Only one method of specifying size can be used \ndest_width and/or dest_height\nresize_shorter\nresize_longer") - if (resize_longer && resize_shorter) - THROW("'resize_longer' and 'resize_shorter' cannot be passed together. They are mutually exclusive.") + if (resize_longer && resize_shorter && scaling_mode != RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX) + THROW("'resize_longer' and 'resize_shorter' can only be passed together for min max scaling mode") unsigned out_width, out_height; RocalResizeScalingMode resize_scaling_mode; // Change the scaling mode if resize_shorter or resize_longer is specified - if (resize_shorter) { + if (scaling_mode == RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX) { + resize_scaling_mode = scaling_mode; + out_width = dest_width; + out_height = dest_height; + } else if (resize_shorter) { resize_scaling_mode = RocalResizeScalingMode::ROCAL_SCALING_MODE_NOT_SMALLER; out_width = out_height = resize_shorter; } else if (resize_longer) { @@ -609,6 +613,10 @@ RocalTensor ROCAL_API_CALL max_out_height = maximum_size[1] ? maximum_size[1] : max_out_height; } } + if (scaling_mode == RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX) { + // For Min Max scaling mode, both min size and max size are passed as resize_shorter and resize_longer values + maximum_size = {resize_shorter, resize_longer}; + } RocalTensorlayout op_tensor_layout = static_cast(output_layout); RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); diff --git a/rocAL/source/api/rocal_api_data_transfer.cpp b/rocAL/source/api/rocal_api_data_transfer.cpp index 189328f6e..a3e3088cf 100644 --- a/rocAL/source/api/rocal_api_data_transfer.cpp +++ b/rocAL/source/api/rocal_api_data_transfer.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext p_context, void* out_ptr, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type) { + bool reverse_channels, RocalOutputMemType output_mem_type, int max_height, int max_width) { auto context = static_cast(p_context); try { if (tensor_format != ROCAL_NHWC && tensor_format != ROCAL_NCHW) @@ -42,7 +42,7 @@ rocalToTensor(RocalContext p_context, void* out_ptr, RocalTensorLayout tensor_fo auto tensor_layout = (tensor_format == ROCAL_NHWC) ? RocalTensorlayout::NHWC : RocalTensorlayout::NCHW; auto tensor_output_data_type = (tensor_output_type == ROCAL_FP32) ? RocalTensorDataType::FP32 : RocalTensorDataType::FP16; context->master_graph->to_tensor(out_ptr, tensor_layout, multiplier0, multiplier1, multiplier2, - offset0, offset1, offset2, reverse_channels, tensor_output_data_type, output_mem_type); + offset0, offset1, offset2, reverse_channels, tensor_output_data_type, output_mem_type, max_height, max_width); } catch (const std::exception& e) { context->capture_error(e.what()); ERR(e.what()) diff --git a/rocAL/source/api/rocal_api_meta_data.cpp b/rocAL/source/api/rocal_api_meta_data.cpp index 1fa4768a7..0eaf89958 100644 --- a/rocAL/source/api/rocal_api_meta_data.cpp +++ b/rocAL/source/api/rocal_api_meta_data.cpp @@ -71,14 +71,14 @@ RocalMetaData RocalMetaData ROCAL_API_CALL - rocalCreateCOCOReader(RocalContext p_context, const char* source_path, bool is_output, bool mask, bool ltrb, bool is_box_encoder) { + rocalCreateCOCOReader(RocalContext p_context, const char* source_path, bool is_output, bool mask, bool ltrb, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping) { if (!p_context) THROW("Invalid rocal context passed to rocalCreateCOCOReader") auto context = static_cast(p_context); if (mask) { - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::PolygonMask, ltrb, is_box_encoder); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::PolygonMask, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping); } - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::BoundingBox, ltrb, is_box_encoder); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::BoundingBox, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping); } RocalMetaData @@ -200,8 +200,7 @@ void if (context->user_batch_size() != meta_data_batch_size) THROW("meta data batch size is wrong " + TOSTR(meta_data_batch_size) + " != " + TOSTR(context->user_batch_size())) for (unsigned int i = 0; i < meta_data_batch_size; i++) { - std::string str_id = meta_data.first[i].erase(0, meta_data.first[i].find_first_not_of('0')); - buf[i] = stoi(str_id); + buf[i] = meta_data.second->get_image_id_batch()[i]; } } @@ -360,6 +359,32 @@ void } } +void + ROCAL_API_CALL + rocalGetROIImageSizes(RocalContext p_context, int* buf) { + if (!p_context) { + THROW("Invalid rocal context passed to rocalGetROIImageSizes") + return; + } + auto context = static_cast(p_context); + try { + auto meta_data = context->master_graph->meta_data(); + size_t meta_data_batch_size = meta_data.second->get_img_roi_sizes_batch().size(); + + if (!meta_data.second) { + WRN("No label has been loaded for this output image") + return; + } + for (unsigned i = 0; i < meta_data_batch_size; i++) { + memcpy(buf, &(meta_data.second->get_img_roi_sizes_batch()[i]), sizeof(ImgSize)); + buf += 2; + } + } catch (const std::exception& e) { + context->capture_error(e.what()); + std::cerr << e.what() << '\n'; + } +} + RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext p_context, const char* source_path, const char* file_prefix) { @@ -396,10 +421,8 @@ void } } -void - ROCAL_API_CALL - rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria, - std::vector& means, std::vector& stds, bool offset, float scale) { +void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria, + std::vector& means, std::vector& stds, bool offset, float scale) { if (!p_context) THROW("Invalid rocal context passed to rocalBoxEncoder") auto context = static_cast(p_context); diff --git a/rocAL/source/meta_data/coco_meta_data_reader.cpp b/rocAL/source/meta_data/coco_meta_data_reader.cpp index 66b34157f..d0ddea904 100644 --- a/rocAL/source/meta_data/coco_meta_data_reader.cpp +++ b/rocAL/source/meta_data/coco_meta_data_reader.cpp @@ -33,6 +33,8 @@ using namespace std; void COCOMetaDataReader::init(const MetaDataConfig &cfg, pMetaDataBatch meta_data_batch) { _path = cfg.path(); + _avoid_class_remapping = cfg.class_remapping(); + this->set_aspect_ratio_grouping(cfg.aspect_ratio_grouping()); _output = meta_data_batch; _output->set_metadata_type(cfg.type()); } @@ -41,6 +43,13 @@ bool COCOMetaDataReader::exists(const std::string &image_name) { return _map_content.find(image_name) != _map_content.end(); } +ImgSize COCOMetaDataReader::lookup_image_size(const std::string &image_name) { + auto it = _map_content.find(image_name); + if (_map_content.end() == it) + THROW("ERROR: Given name not present in the map " + image_name) + return it->second->get_img_size(); +} + void COCOMetaDataReader::lookup(const std::vector &image_names) { if (image_names.empty()) { WRN("No image names passed") @@ -67,7 +76,7 @@ void COCOMetaDataReader::lookup(const std::vector &image_names) { } } -void COCOMetaDataReader::add(std::string image_name, BoundingBoxCords bb_coords, Labels bb_labels, ImgSize image_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count) { +void COCOMetaDataReader::add(std::string image_name, BoundingBoxCords bb_coords, Labels bb_labels, ImgSize image_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count, int image_id) { if (exists(image_name)) { auto it = _map_content.find(image_name); it->second->get_bb_cords().push_back(bb_coords[0]); @@ -77,7 +86,7 @@ void COCOMetaDataReader::add(std::string image_name, BoundingBoxCords bb_coords, it->second->get_vertices_count().push_back(vertices_count[0]); return; } - pMetaDataPolygonMask info = std::make_shared(bb_coords, bb_labels, image_size, mask_cords, polygon_count, vertices_count); + pMetaDataPolygonMask info = std::make_shared(bb_coords, bb_labels, image_size, mask_cords, polygon_count, vertices_count, image_id); _map_content.insert(pair>(image_name, info)); } @@ -163,6 +172,7 @@ void COCOMetaDataReader::read_all(const std::string &path) { parser.EnterObject(); while (const char *key = parser.NextObjectKey()) { if (0 == std::strcmp(key, "images")) { + int image_id; RAPIDJSON_ASSERT(parser.PeekType() == kArrayType); parser.EnterArray(); while (parser.NextArrayValue()) { @@ -178,10 +188,13 @@ void COCOMetaDataReader::read_all(const std::string &path) { img_size.h = parser.GetInt(); } else if (0 == std::strcmp(internal_key, "file_name")) { image_name = parser.GetString(); + } else if (0 == std::strcmp(internal_key, "id")) { + image_id = parser.GetInt(); } else { parser.SkipValue(); } } + _map_img_names.insert(pair(image_id, image_name)); _map_img_sizes.insert(pair(image_name, img_size)); img_size = {}; } @@ -256,12 +269,9 @@ void COCOMetaDataReader::read_all(const std::string &path) { parser.SkipValue(); } } - char buffer[13]; - sprintf(buffer, "%012d", id); - string str(buffer); - std::string file_name = str + ".jpg"; - auto it = _map_img_sizes.find(file_name); + auto itr = _map_img_names.find(id); + auto it = _map_img_sizes.find(itr->second); ImgSize image_size = it->second; // Convert to "ltrb" format if ((_output->get_metadata_type() == MetaDataType::PolygonMask) && iscrowd == 0) { box.l = bbox[0]; @@ -272,7 +282,7 @@ void COCOMetaDataReader::read_all(const std::string &path) { bb_labels.push_back(label); polygon_count.push_back(polygon_size); vertices_count.push_back(vertices_array); - add(file_name, bb_coords, bb_labels, image_size, mask, polygon_count, vertices_count); + add(itr->second, bb_coords, bb_labels, image_size, mask, polygon_count, vertices_count, id); mask.clear(); polygon_size = 0; polygon_count.clear(); @@ -287,7 +297,7 @@ void COCOMetaDataReader::read_all(const std::string &path) { box.b = (bbox[1] + bbox[3]); bb_coords.push_back(box); bb_labels.push_back(label); - add(file_name, bb_coords, bb_labels, image_size, id); + add(itr->second, bb_coords, bb_labels, image_size, id); bb_coords.clear(); bb_labels.clear(); } @@ -303,7 +313,7 @@ void COCOMetaDataReader::read_all(const std::string &path) { Labels continuous_label_id; for (unsigned int i = 0; i < bb_coords.size(); i++) { auto _it_label = _label_info.find(bb_labels[i]); - int cnt_idx = _it_label->second; + int cnt_idx = _avoid_class_remapping ? _it_label->first : _it_label->second; continuous_label_id.push_back(cnt_idx); } elem.second->set_labels(continuous_label_id); diff --git a/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp b/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp index a2f2db643..b3deb4199 100644 --- a/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp +++ b/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp @@ -75,6 +75,11 @@ void ResizeMirrorNormalizeMetaNode::update_parameters(pMetaDataBatch input_meta_ bb_coords.push_back(coords_buf[j]); bb_labels.push_back(labels_buf[j]); } + // get roi width and height of output image + auto img_roi_size = input_meta_data->get_img_roi_sizes_batch()[i]; + img_roi_size.w = output_roi[i].x2; + img_roi_size.h = output_roi[i].y2; + output_meta_data->get_img_roi_sizes_batch()[i] = img_roi_size; output_meta_data->get_bb_cords_batch()[i] = bb_coords; output_meta_data->get_labels_batch()[i] = bb_labels; } diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index f36139122..a11618074 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -263,9 +263,9 @@ MasterGraph::build() { THROW("No output tensors are there, cannot create the pipeline") #if ENABLE_HIP || ENABLE_OPENCL - _ring_buffer.init(_mem_type, (void *)_device.resources(), _internal_tensor_list.data_size()); + _ring_buffer.init(_mem_type, (void *)_device.resources(), _internal_tensor_list.data_size(), _user_batch_size * sizeof(RocalROI)); #else - _ring_buffer.init(_mem_type, nullptr, _internal_tensor_list.data_size()); + _ring_buffer.init(_mem_type, nullptr, _internal_tensor_list.data_size(), _user_batch_size * sizeof(RocalROI)); #endif if (_is_box_encoder) _ring_buffer.initBoxEncoderMetaData(_mem_type, _user_batch_size * _num_anchors * 4 * sizeof(float), _user_batch_size * _num_anchors * sizeof(int)); create_single_graph(); @@ -452,7 +452,7 @@ MasterGraph::timing() { MasterGraph::Status MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, - float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type) { + float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, int max_height, int max_width) { if (no_more_processed_data()) return MasterGraph::Status::NO_MORE_DATA; @@ -474,6 +474,10 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier const size_t h = dims[1]; const size_t w = dims[2]; const size_t single_output_tensor_size = output_tensor_info.data_size(); + if ((max_height == 0) || (max_width == 0)) { + max_height = h; + max_width = w; + } #if ENABLE_OPENCL if (output_tensor_info.mem_type() == RocalMemType::OCL) { @@ -491,7 +495,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier cl_kernel kernel = _device["utility"][kernel_name]; auto queue = _device.resources()->cmd_queue; unsigned dest_buf_offset = 0; - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; if (_output_tensor_buffer == nullptr) { size_t size = output_tensor_info.data_size() * sizeof(cl_float); @@ -548,7 +552,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier if (output_tensor_info.mem_type() == RocalMemType::HIP) { unsigned int fp16 = (output_data_type == RocalTensorDataType::FP16); - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; unsigned dest_buf_offset = 0; // copy hip buffer to out_ptr // todo:: add callback routing to exchange memory pointer to avoid extra copy @@ -556,11 +560,11 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier auto img_buffer = out_tensor; if (format == RocalTensorlayout::NHWC) { HipExecCopyInt8ToNHWC(_device.resources()->hip_stream, (const void *)img_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); } else { HipExecCopyInt8ToNCHW(_device.resources()->hip_stream, (const void *)img_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); } dest_buf_offset += single_output_tensor_size; } @@ -569,7 +573,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier if (output_mem_type == RocalOutputMemType::ROCAL_MEMCPY_GPU) { unsigned int fp16 = (output_data_type == RocalTensorDataType::FP16); - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; unsigned dest_buf_offset = 0; if (_output_tensor_buffer == nullptr) { @@ -593,11 +597,11 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier if (format == RocalTensorlayout::NHWC) { HipExecCopyInt8ToNHWC(_device.resources()->hip_stream, (const void *)_output_tensor_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); } else { HipExecCopyInt8ToNCHW(_device.resources()->hip_stream, (const void *)_output_tensor_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); } dest_buf_offset += single_output_tensor_size; } @@ -610,15 +614,17 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier float offset[3] = {offset0, offset1, offset2}; size_t dest_buf_offset_start = 0; - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; auto num_threads = _cpu_num_threads * 2; for (auto &&out_tensor : output_buffers) { unsigned int single_tensor_size = w * c * h; - auto channel_size = w * h; + unsigned int channel_size = max_width * max_height; + unsigned int output_single_tensor_size = max_height * max_width * c; + unsigned int input_width_stride = w * c; #pragma omp parallel for num_threads(num_threads) - for (unsigned int batchCount = 0; batchCount < n; batchCount++) { - size_t dest_buf_offset = dest_buf_offset_start + single_tensor_size * batchCount; - auto in_buffer = (unsigned char *)out_tensor + single_tensor_size * batchCount; + for (unsigned int batch_count = 0; batch_count < n; batch_count++) { + size_t dest_buf_offset = dest_buf_offset_start + output_single_tensor_size * batch_count; + auto in_buffer = (unsigned char *)out_tensor + single_tensor_size * batch_count; if (format == RocalTensorlayout::NHWC) { if (output_data_type == RocalTensorDataType::FP32) { @@ -669,34 +675,37 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 padd0 = _mm256_set1_ps(offset0); __m256 padd1 = _mm256_set1_ps(offset1); __m256 padd2 = _mm256_set1_ps(offset2); - unsigned int alignedLength = (channel_size & ~7); // multiple of 8 - unsigned int i = 0; + int alignedLength = (max_width & ~7); // multiple of 8 __m256 fR, fG, fB; - for (; i < alignedLength; i += 8) { - __m256i pix0 = _mm256_loadu_si256((const __m256i *)in_buffer); - pix0 = _mm256_permutevar8x32_epi32(pix0, _mm256_setr_epi32(0, 1, 2, 3, 3, 4, 5, 6)); - fB = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_R)); - fG = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_G)); - fR = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_B)); - fB = _mm256_mul_ps(fB, pmul0); - fG = _mm256_mul_ps(fG, pmul1); - fR = _mm256_mul_ps(fR, pmul2); - fB = _mm256_add_ps(fB, padd0); - fG = _mm256_add_ps(fG, padd1); - fR = _mm256_add_ps(fR, padd2); - _mm256_storeu_ps(B_buf, fB); - _mm256_storeu_ps(G_buf, fG); - _mm256_storeu_ps(R_buf, fR); - B_buf += 8; - G_buf += 8; - R_buf += 8; - in_buffer += 24; - } - for (; i < channel_size; i++, in_buffer += 3) { - *B_buf++ = (in_buffer[0] * multiplier0) + offset0; - *G_buf++ = (in_buffer[1] * multiplier1) + offset1; - *R_buf++ = (in_buffer[2] * multiplier2) + offset1; + for (int row = 0; row < max_height; row++) { + unsigned char *in_buffer_row = reinterpret_cast(in_buffer) + (row * input_width_stride); + int col = 0; + for (; col < alignedLength; col += 8) { + __m256i pix0 = _mm256_loadu_si256((const __m256i *)in_buffer_row); + pix0 = _mm256_permutevar8x32_epi32(pix0, _mm256_setr_epi32(0, 1, 2, 3, 3, 4, 5, 6)); + fB = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_R)); + fG = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_G)); + fR = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_B)); + fB = _mm256_mul_ps(fB, pmul0); + fG = _mm256_mul_ps(fG, pmul1); + fR = _mm256_mul_ps(fR, pmul2); + fB = _mm256_add_ps(fB, padd0); + fG = _mm256_add_ps(fG, padd1); + fR = _mm256_add_ps(fR, padd2); + _mm256_storeu_ps(B_buf, fB); + _mm256_storeu_ps(G_buf, fG); + _mm256_storeu_ps(R_buf, fR); + B_buf += 8; + G_buf += 8; + R_buf += 8; + in_buffer_row += 24; + } + for (; col < max_width; col++, in_buffer_row += 3) { + *B_buf++ = (in_buffer_row[0] * multiplier0) + offset0; + *G_buf++ = (in_buffer_row[1] * multiplier1) + offset1; + *R_buf++ = (in_buffer_row[2] * multiplier2) + offset1; + } } #else for (unsigned channel_idx = 0; channel_idx < c; channel_idx++) { @@ -733,35 +742,38 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 padd0 = _mm256_set1_ps(offset0); __m256 padd1 = _mm256_set1_ps(offset1); __m256 padd2 = _mm256_set1_ps(offset2); - unsigned int alignedLength = (channel_size & ~7); // multiple of 8 - unsigned int i = 0; + int alignedLength = (max_width & ~7); // multiple of 8 __m256 fR, fG, fB; __m128i tempR, tempG, tempB; - for (; i < alignedLength; i += 8) { - __m256i pix0 = _mm256_loadu_si256((const __m256i *)in_buffer); - pix0 = _mm256_permutevar8x32_epi32(pix0, _mm256_setr_epi32(0, 1, 2, 3, 3, 4, 5, 6)); - fB = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_R)); - fG = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_G)); - fR = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_B)); - fB = _mm256_fmadd_ps(fB, pmul0, padd0); - fG = _mm256_fmadd_ps(fG, pmul1, padd1); - fR = _mm256_fmadd_ps(fR, pmul2, padd2); - tempB = _mm256_cvtps_ph(fB, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); - tempG = _mm256_cvtps_ph(fG, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); - tempR = _mm256_cvtps_ph(fR, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); - _mm_storeu_si128((__m128i *)B_buf_16, tempB); - _mm_storeu_si128((__m128i *)G_buf_16, tempG); - _mm_storeu_si128((__m128i *)R_buf_16, tempR); - B_buf_16 += 8; - G_buf_16 += 8; - R_buf_16 += 8; - in_buffer += 24; - } - for (; i < channel_size; i++, in_buffer += 3) { - *B_buf_16++ = (half)(in_buffer[0] * multiplier0) + offset0; - *G_buf_16++ = (half)(in_buffer[1] * multiplier1) + offset1; - *R_buf_16++ = (half)(in_buffer[2] * multiplier2) + offset2; + for (int row = 0; row < max_height; row++) { + unsigned char *in_buffer_row = reinterpret_cast(in_buffer) + (row * input_width_stride); + int col = 0; + for (; col < alignedLength; col += 8) { + __m256i pix0 = _mm256_loadu_si256((const __m256i *)in_buffer_row); + pix0 = _mm256_permutevar8x32_epi32(pix0, _mm256_setr_epi32(0, 1, 2, 3, 3, 4, 5, 6)); + fB = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_R)); + fG = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_G)); + fR = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_B)); + fB = _mm256_fmadd_ps(fB, pmul0, padd0); + fG = _mm256_fmadd_ps(fG, pmul1, padd1); + fR = _mm256_fmadd_ps(fR, pmul2, padd2); + tempB = _mm256_cvtps_ph(fB, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + tempG = _mm256_cvtps_ph(fG, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + tempR = _mm256_cvtps_ph(fR, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + _mm_storeu_si128((__m128i *)B_buf_16, tempB); + _mm_storeu_si128((__m128i *)G_buf_16, tempG); + _mm_storeu_si128((__m128i *)R_buf_16, tempR); + B_buf_16 += 8; + G_buf_16 += 8; + R_buf_16 += 8; + in_buffer_row += 24; + } + for (; col < max_width; col++, in_buffer_row += 3) { + *B_buf_16++ = (half)(in_buffer_row[0] * multiplier0) + offset0; + *G_buf_16++ = (half)(in_buffer_row[1] * multiplier1) + offset1; + *R_buf_16++ = (half)(in_buffer_row[2] * multiplier2) + offset2; + } } #else for (unsigned channel_idx = 0; channel_idx < c; channel_idx++) { @@ -808,7 +820,7 @@ MasterGraph::copy_output(unsigned char *out_ptr, size_t out_size_in_bytes) { // to avoid unnecessary sequence of synchronizations // get_read_buffers() calls block_if_empty() internally and blocks if buffers are empty until a new batch is processed - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; auto out_image_idx = output_buffers.size(); for (auto &&output_handle : output_buffers) { bool sync_flag = (--out_image_idx == 0) ? CL_TRUE : CL_FALSE; @@ -831,7 +843,7 @@ MasterGraph::copy_output(unsigned char *out_ptr, size_t out_size_in_bytes) { // get_read_buffers() calls block_if_empty() internally and blocks if buffers are empty until a new batch is processed size_t dest_buf_offset = 0; - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; for (auto &&output_handle : output_buffers) { hipError_t err = hipMemcpyDtoHAsync((void *)(out_ptr + dest_buf_offset), output_handle, size, _device.resources()->hip_stream); if (err) { @@ -846,7 +858,7 @@ MasterGraph::copy_output(unsigned char *out_ptr, size_t out_size_in_bytes) { } else { #endif // get_read_buffer is blocking if _ring_buffer is empty, and blocks this thread till internal processing thread process a new batch and store in the _ring_buffer - auto output_buffer = _ring_buffer.get_read_buffers()[0]; + auto output_buffer = _ring_buffer.get_read_buffers().first[0]; memcpy(out_ptr, output_buffer, size); #if ENABLE_OPENCL || ENABLE_HIP } @@ -857,10 +869,13 @@ MasterGraph::copy_output(unsigned char *out_ptr, size_t out_size_in_bytes) { TensorList * MasterGraph::get_output_tensors() { - auto output_ptr = _ring_buffer.get_read_buffers(); - for (unsigned i = 0; i < _internal_tensor_list.size(); i++) + auto read_buffers = _ring_buffer.get_read_buffers(); + auto output_ptr = read_buffers.first; + auto roi_ptr = read_buffers.second; + for (unsigned i = 0; i < _internal_tensor_list.size(); i++) { _output_tensor_list[i]->set_mem_handle(output_ptr[i]); - + _output_tensor_list[i]->set_roi(roi_ptr[i]); + } return &_output_tensor_list; } @@ -880,6 +895,7 @@ void MasterGraph::output_routine() { _rb_block_if_full_time.start(); // _ring_buffer.get_write_buffers() is blocking and blocks here until user uses processed image by calling run() and frees space in the ring_buffer auto write_buffers = _ring_buffer.get_write_buffers(); + auto write_output_buffers = write_buffers.first; _rb_block_if_full_time.end(); // Swap handles on the input tensor, so that new tensor is loaded to be processed @@ -904,7 +920,7 @@ void MasterGraph::output_routine() { // Swap handles on the output tensor, so that new processed tensor will be written to the a new buffer for (size_t idx = 0; idx < _internal_tensor_list.size(); idx++) - _internal_tensor_list[idx]->swap_handle(write_buffers[idx]); + _internal_tensor_list[idx]->swap_handle(write_output_buffers[idx]); if (!_processing) break; @@ -931,6 +947,10 @@ void MasterGraph::output_routine() { _process_time.start(); _graph->process(); _process_time.end(); + + auto write_roi_buffers = write_buffers.second; // Obtain ROI buffers from ring buffer + for (size_t idx = 0; idx < _internal_tensor_list.size(); idx++) + _internal_tensor_list[idx]->copy_roi(write_roi_buffers[idx]); // Copy ROI from internal tensor's buffer to ring buffer _bencode_time.start(); if (_is_box_encoder) { auto bbox_encode_write_buffers = _ring_buffer.get_box_encode_write_buffers(); @@ -984,13 +1004,15 @@ void MasterGraph::stop_processing() { _output_thread.join(); } -std::vector MasterGraph::create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType metadata_type, bool ltrb_bbox, bool is_box_encoder, float sigma, unsigned pose_output_width, unsigned pose_output_height) { +std::vector MasterGraph::create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType metadata_type, bool ltrb_bbox, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping, float sigma, unsigned pose_output_width, unsigned pose_output_height) { if (_meta_data_reader) THROW("A metadata reader has already been created") if (_augmented_meta_data) THROW("Metadata output already defined, there can only be a single output for metadata augmentation"); MetaDataConfig config(metadata_type, reader_type, source_path, std::map(), std::string()); + config.set_avoid_class_remapping(avoid_class_remapping); + config.set_aspect_ratio_grouping(aspect_ratio_grouping); config.set_out_img_width(pose_output_width); config.set_out_img_height(pose_output_height); _meta_data_graph = create_meta_data_graph(config); @@ -1409,7 +1431,7 @@ MasterGraph::copy_out_tensor_planar(void *out_ptr, RocalTensorlayout format, flo float offset[3] = {offset0, offset1, offset2}; size_t dest_buf_offset = 0; - auto output_buffers = _ring_buffer.get_read_buffers(); + auto output_buffers = _ring_buffer.get_read_buffers().first; for (auto &&out_tensor : output_buffers) { for (unsigned batch = 0; batch < n; batch++) { diff --git a/rocAL/source/pipeline/ring_buffer.cpp b/rocAL/source/pipeline/ring_buffer.cpp index 4dad4e7a5..9d7a798e9 100644 --- a/rocAL/source/pipeline/ring_buffer.cpp +++ b/rocAL/source/pipeline/ring_buffer.cpp @@ -22,11 +22,11 @@ THE SOFTWARE. #include "ring_buffer.h" -#include - RingBuffer::RingBuffer(unsigned buffer_depth) : BUFF_DEPTH(buffer_depth), _dev_sub_buffer(buffer_depth), _host_sub_buffers(buffer_depth), + _dev_roi_buffers(buffer_depth), + _host_roi_buffers(buffer_depth), _dev_bbox_buffer(buffer_depth), _dev_labels_buffer(buffer_depth) { reset(); @@ -50,11 +50,13 @@ void RingBuffer::block_if_full() { _wait_for_unload.wait(lock); } } -std::vector RingBuffer::get_read_buffers() { + +std::pair, std::vector> RingBuffer::get_read_buffers() { block_if_empty(); if ((_mem_type == RocalMemType::OCL) || (_mem_type == RocalMemType::HIP)) - return _dev_sub_buffer[_read_ptr]; - return _host_sub_buffers[_read_ptr]; + return std::make_pair(_dev_sub_buffer[_read_ptr], _dev_roi_buffers[_read_ptr]); + + return std::make_pair(_host_sub_buffers[_read_ptr], _host_roi_buffers[_read_ptr]); } std::pair RingBuffer::get_box_encode_read_buffers() { @@ -64,12 +66,12 @@ std::pair RingBuffer::get_box_encode_read_buffers() { return std::make_pair(_host_meta_data_buffers[_read_ptr][1], _host_meta_data_buffers[_read_ptr][0]); } -std::vector RingBuffer::get_write_buffers() { +std::pair, std::vector> RingBuffer::get_write_buffers() { block_if_full(); if ((_mem_type == RocalMemType::OCL) || (_mem_type == RocalMemType::HIP)) - return _dev_sub_buffer[_write_ptr]; + return std::make_pair(_dev_sub_buffer[_write_ptr], _dev_roi_buffers[_write_ptr]); - return _host_sub_buffers[_write_ptr]; + return std::make_pair(_host_sub_buffers[_write_ptr], _host_roi_buffers[_write_ptr]); } std::pair RingBuffer::get_box_encode_write_buffers() { @@ -109,7 +111,7 @@ void RingBuffer::unblock_writer() { _wait_for_unload.notify_all(); } -void RingBuffer::init(RocalMemType mem_type, void *devres, std::vector &sub_buffer_size) { +void RingBuffer::init(RocalMemType mem_type, void *devres, std::vector &sub_buffer_size, size_t roi_buffer_size) { _mem_type = mem_type; _dev = devres; _sub_buffer_size = sub_buffer_size; @@ -152,6 +154,7 @@ void RingBuffer::init(RocalMemType mem_type, void *devres, std::vector & for (size_t buffIdx = 0; buffIdx < BUFF_DEPTH; buffIdx++) { _dev_sub_buffer[buffIdx].resize(sub_buffer_count); + _dev_roi_buffers[buffIdx].resize(sub_buffer_count); for (unsigned sub_idx = 0; sub_idx < sub_buffer_count; sub_idx++) { hipError_t err = hipMalloc(&_dev_sub_buffer[buffIdx][sub_idx], _sub_buffer_size[sub_idx]); // printf("allocated HIP device buffer <%d, %d, %d, %p>\n", buffIdx, sub_idx, _sub_buffer_size[sub_idx], _dev_sub_buffer[buffIdx][sub_idx]); @@ -160,6 +163,11 @@ void RingBuffer::init(RocalMemType mem_type, void *devres, std::vector & THROW("hipMalloc of size " + TOSTR(_sub_buffer_size[sub_idx]) + " index " + TOSTR(sub_idx) + " failed " + TOSTR(err)); } + err = hipHostMalloc((void **)&_dev_roi_buffers[buffIdx][sub_idx], roi_buffer_size, hipHostMallocDefault); // Allocate HIP page locked ROI buffers + if (err != hipSuccess || !_dev_roi_buffers[buffIdx][sub_idx]) { + _dev_roi_buffers.clear(); + THROW("hipHostMalloc of size " + TOSTR(roi_buffer_size) + " failed " + TOSTR(err)) + } } } } else { @@ -167,8 +175,11 @@ void RingBuffer::init(RocalMemType mem_type, void *devres, std::vector & for (size_t buffIdx = 0; buffIdx < BUFF_DEPTH; buffIdx++) { // a minimum of extra MEM_ALIGNMENT is allocated _host_sub_buffers[buffIdx].resize(sub_buffer_count); - for (size_t sub_buff_idx = 0; sub_buff_idx < sub_buffer_count; sub_buff_idx++) + _host_roi_buffers[buffIdx].resize(sub_buffer_count); + for (size_t sub_buff_idx = 0; sub_buff_idx < sub_buffer_count; sub_buff_idx++) { _host_sub_buffers[buffIdx][sub_buff_idx] = aligned_alloc(MEM_ALIGNMENT, MEM_ALIGNMENT * (_sub_buffer_size[sub_buff_idx] / MEM_ALIGNMENT + 1)); + _host_roi_buffers[buffIdx][sub_buff_idx] = static_cast(malloc(roi_buffer_size)); // Allocate HOST ROI buffers + } } #if ENABLE_OPENCL || ENABLE_HIP } @@ -287,6 +298,11 @@ void RingBuffer::release_gpu_res() { // printf("Error Freeing device buffer <%d, %d, %p>\n", buffIdx, sub_buf_idx, _dev_sub_buffer[buffIdx][sub_buf_idx]); ERR("Could not release hip memory in the ring buffer") } + if (_dev_roi_buffers[buffIdx][sub_buf_idx]) { + if (hipHostFree((void *)_dev_roi_buffers[buffIdx][sub_buf_idx]) != hipSuccess) { + ERR("Could not release hip memory for ROI in the ring buffer") + } + } } if (_host_meta_data_buffers.size() != 0) { for (unsigned sub_buf_idx = 0; sub_buf_idx < _host_meta_data_buffers[buffIdx].size(); sub_buf_idx++) { @@ -297,6 +313,7 @@ void RingBuffer::release_gpu_res() { } _dev_sub_buffer.clear(); _host_meta_data_buffers.clear(); + _dev_roi_buffers.clear(); } #elif ENABLE_OPENCL if (_mem_type == RocalMemType::OCL) { @@ -325,6 +342,8 @@ RingBuffer::~RingBuffer() { for (unsigned sub_buf_idx = 0; sub_buf_idx < _host_sub_buffers[buffIdx].size(); sub_buf_idx++) { if (_host_sub_buffers[buffIdx][sub_buf_idx]) free(_host_sub_buffers[buffIdx][sub_buf_idx]); + if (_host_roi_buffers[buffIdx][sub_buf_idx]) + free(_host_roi_buffers[buffIdx][sub_buf_idx]); } if (_host_meta_data_buffers.size() != 0) { for (unsigned sub_buf_idx = 0; sub_buf_idx < _host_meta_data_buffers[buffIdx].size(); sub_buf_idx++) { @@ -335,6 +354,7 @@ RingBuffer::~RingBuffer() { } _host_sub_buffers.clear(); _host_meta_data_buffers.clear(); + _host_roi_buffers.clear(); } } diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index 2cebfd0e0..08e73b5ee 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -107,9 +107,14 @@ bool operator==(const TensorInfo &rhs, const TensorInfo &lhs) { } void TensorInfo::reset_tensor_roi_buffers() { - if (!_roi_buf) { - size_t roi_size = (_layout == RocalTensorlayout::NFCHW || _layout == RocalTensorlayout::NFHWC) ? _dims[0] * _dims[1] : _batch_size; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions - allocate_host_or_pinned_mem((void **)&_roi_buf, roi_size * 4 * sizeof(unsigned), _mem_type); + size_t roi_size = (_layout == RocalTensorlayout::NFCHW || _layout == RocalTensorlayout::NFHWC) ? _dims[0] * _dims[1] : _batch_size; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions + allocate_host_or_pinned_mem((void **)&_roi_buf, roi_size * 4 * sizeof(unsigned), _mem_type); + if (_mem_type == RocalMemType::HIP) { +#if ENABLE_HIP + _roi.reset(_roi_buf, hipHostFree); +#endif + } else { + _roi.reset(_roi_buf, free); } if (_is_image) { auto roi = get_roi(); @@ -172,46 +177,6 @@ TensorInfo::TensorInfo(std::vector dims, set_max_shape(); } -TensorInfo::TensorInfo(const TensorInfo &other) { - _type = other._type; - _num_of_dims = other._num_of_dims; - _dims = other._dims; - _strides = other._strides; - _batch_size = other._batch_size; - _mem_type = other._mem_type; - _roi_type = other._roi_type; - _data_type = other._data_type; - _layout = other._layout; - _color_format = other._color_format; - _data_type_size = other._data_type_size; - _data_size = other._data_size; - _max_shape = other._max_shape; - _is_image = other._is_image; - _is_metadata = other._is_metadata; - _channels = other._channels; - if (!other.is_metadata()) { // For Metadata ROI buffer is not required - allocate_host_or_pinned_mem(&_roi_buf, _batch_size * 4 * sizeof(unsigned), _mem_type); - memcpy((void *)_roi_buf, (const void *)other.get_roi(), _batch_size * 4 * sizeof(unsigned)); - } -} - -TensorInfo::~TensorInfo() { - if (!_is_metadata) { - if (_mem_type == RocalMemType::HIP) { -#if ENABLE_HIP - if (_roi_buf) { - hipError_t err = hipHostFree(_roi_buf); - if (err != hipSuccess) - ERR("hipHostFree failed " + TOSTR(err)); - } -#endif - } else { - if (_roi_buf) free(_roi_buf); - } - _roi_buf = nullptr; - } -} - void Tensor::update_tensor_roi(const std::vector &width, const std::vector &height) { if (_info.is_image()) { diff --git a/rocAL/source/readers/image/coco_file_source_reader.cpp b/rocAL/source/readers/image/coco_file_source_reader.cpp index 636421a29..22eb63b02 100644 --- a/rocAL/source/readers/image/coco_file_source_reader.cpp +++ b/rocAL/source/readers/image/coco_file_source_reader.cpp @@ -85,9 +85,53 @@ Reader::Status COCOFileSourceReader::initialize(ReaderConfig desc) { replicate_last_batch_to_pad_partial_shard(); } } - // shuffle dataset if set - if (ret == Reader::Status::OK && _shuffle) - std::random_shuffle(_file_names.begin(), _file_names.end()); + + if (_meta_data_reader && _meta_data_reader->aspect_ratio_grouping()) { + // calculate the aspect ratio for each file and create a pair of + std::vector> file_aspect_ratio_pair(_file_names.size()); + for (size_t i = 0; i < _file_names.size(); i++) { + auto filename = _file_names[i]; + std::string base_filename = filename.substr(filename.find_last_of("/\\") + 1); + auto img_size = _meta_data_reader->lookup_image_size(base_filename); + auto aspect_ratio = static_cast(img_size.h) / img_size.w; + file_aspect_ratio_pair[i] = std::make_pair(filename, aspect_ratio); + _aspect_ratios.push_back(aspect_ratio); + }; + + // sort the pairs according to aspect ratios + std::sort(file_aspect_ratio_pair.begin(), file_aspect_ratio_pair.end(), [](auto &lop, auto &rop) { return lop.second < rop.second; }); + + // extract sorted file_names + std::transform(file_aspect_ratio_pair.begin(), file_aspect_ratio_pair.end(), std::back_inserter(_sorted_file_names), [](auto &pair) { return pair.first; }); + // extract sorted aspect ratios + _aspect_ratios.clear(); + std::transform(file_aspect_ratio_pair.begin(), file_aspect_ratio_pair.end(), std::back_inserter(_aspect_ratios), [](auto &pair) { return pair.second; }); + + // Copy the sorted file_names to _file_names vector to be used in sharding + _file_names = _sorted_file_names; + // Calculate the mid element which divides the aspect ratios into two groups (<=1.0 and >1.0) + auto mid = std::upper_bound(_aspect_ratios.begin(), _aspect_ratios.end(), 1.0f) - _aspect_ratios.begin(); + + // shuffle dataset if set + if (ret == Reader::Status::OK && _shuffle) { + // Shuffle within groups using the mid element as the limit - [start, mid) and [mid, last) + std::random_shuffle(_file_names.begin(), _file_names.begin() + mid); + std::random_shuffle(_file_names.begin() + mid, _file_names.end()); + std::vector shuffled_filenames; + int split_count = _file_names.size() / _batch_count; // Number of batches for this shard + std::vector indexes(split_count); + std::iota(indexes.begin(), indexes.end(), 0); + // Shuffle the index vector and use the index to fetch batch size elements for decoding + std::random_shuffle(indexes.begin(), indexes.end()); + for (auto const idx : indexes) + shuffled_filenames.insert(shuffled_filenames.end(), _file_names.begin() + idx * _batch_count, _file_names.begin() + idx * _batch_count + _batch_count); + _file_names = shuffled_filenames; + } + } else { + // shuffle dataset if set + if (ret == Reader::Status::OK && _shuffle) + std::random_shuffle(_file_names.begin(), _file_names.end()); + } return ret; } @@ -173,8 +217,27 @@ int COCOFileSourceReader::release() { } void COCOFileSourceReader::reset() { - if (_shuffle) + if (_meta_data_reader && _meta_data_reader->aspect_ratio_grouping()) { + _file_names = _sorted_file_names; + // Calculate the mid element which divides the aspect ratios into two groups (<=1.0 and >1.0) + auto mid = std::upper_bound(_aspect_ratios.begin(), _aspect_ratios.end(), 1.0f) - _aspect_ratios.begin(); + if (_shuffle) { + // Shuffle within groups using the mid element as the limit - [start, mid) and [mid, last) + std::random_shuffle(_file_names.begin(), _file_names.begin() + mid); + std::random_shuffle(_file_names.begin() + mid, _file_names.end()); + std::vector shuffled_filenames; + int split_count = _file_names.size() / _batch_count; // Number of batches for this shard + std::vector indexes(split_count); + std::iota(indexes.begin(), indexes.end(), 0); + // Shuffle the index vector and use the index to fetch batch size elements for decoding + std::random_shuffle(indexes.begin(), indexes.end()); + for (auto const idx : indexes) + shuffled_filenames.insert(shuffled_filenames.end(), _file_names.begin() + idx * _batch_count, _file_names.begin() + idx * _batch_count + _batch_count); + _file_names = shuffled_filenames; + } + } else if (_shuffle) { std::random_shuffle(_file_names.begin(), _file_names.end()); + } _read_counter = 0; _curr_file_idx = 0; } diff --git a/rocAL_pybind/amd/rocal/pipeline.py b/rocAL_pybind/amd/rocal/pipeline.py index dca380e09..6c2dc579d 100644 --- a/rocAL_pybind/amd/rocal/pipeline.py +++ b/rocAL_pybind/amd/rocal/pipeline.py @@ -146,6 +146,11 @@ def define_graph(self): def get_handle(self): return self._handle + def copyToExternalTensor(self, array, multiplier, offset, reverse_channels, tensor_format, tensor_dtype, max_height=0, max_width=0): + + b.rocalToTensor(self._handle, ctypes.c_void_p(array.data_ptr()), tensor_format, tensor_dtype, + multiplier[0], multiplier[1], multiplier[2], offset[0], offset[1], offset[2], (1 if reverse_channels else 0), self._output_memory_type, max_height, max_width) + def get_one_hot_encoded_labels(self, array, device): if device == "cpu": if (isinstance(array, np.ndarray)): @@ -212,6 +217,12 @@ def get_bounding_box_labels(self): def get_bounding_box_cords(self): return b.getBoundingBoxCords(self._handle) + def get_mask_count(self, array): + return b.getMaskCount(self._handle, array) + + def get_mask_coordinates(self, array_count, array): + return b.getMaskCoordinates(self._handle, array_count, array) + def get_image_labels(self): return b.getImageLabels(self._handle) @@ -224,6 +235,9 @@ def get_encoded_boxes_and_lables(self, batch_size, num_anchors): def get_img_sizes(self, array): return b.getImgSizes(self._handle, array) + def get_roi_img_sizes(self, array): + return b.getROIImgSizes(self._handle, array) + def get_image_name_length(self, idx): return b.getImageNameLen(self._handle, idx) diff --git a/rocAL_pybind/amd/rocal/readers.py b/rocAL_pybind/amd/rocal/readers.py index d669d588f..9f28a1996 100644 --- a/rocAL_pybind/amd/rocal/readers.py +++ b/rocAL_pybind/amd/rocal/readers.py @@ -29,7 +29,7 @@ def coco(annotations_file='', ltrb=True, masks=False, ratio=False, avoid_class_remapping=False, - pixelwise_masks=False, is_box_encoder=False, is_box_iou_matcher=False, stick_to_shard=False, pad_last_batch=False): + pixelwise_masks=False, is_box_encoder=False, is_box_iou_matcher=False, aspect_ratio_grouping=False, stick_to_shard=False, pad_last_batch=False): """!Creates a COCOReader node. @param annotations_file Path to the COCO annotations file. @@ -40,6 +40,7 @@ def coco(annotations_file='', ltrb=True, masks=False, ratio=False, avoid_class_r @param pixelwise_masks Whether to read mask data and generate pixel-wise masks. @param is_box_encoder Whether to enable box encoder in the pipeline. @param is_box_iou_matcher Whether to enable box IOU matcher in the pipeline. + @param aspect_ratio_grouping Whether to enable aspect ratio grouping in the pipeline. @param stick_to_shard Determines whether the reader should stick to a data shard instead of going through the entire dataset. @param pad_last_batch If set to True, pads the shard by repeating the last sample. @@ -54,7 +55,9 @@ def coco(annotations_file='', ltrb=True, masks=False, ratio=False, avoid_class_r "is_output": True, "mask": masks, "ltrb": ltrb, - "is_box_encoder": is_box_encoder} + "is_box_encoder": is_box_encoder, + "avoid_class_remapping": avoid_class_remapping, + "aspect_ratio_grouping": aspect_ratio_grouping} meta_data = b.cocoReader( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) return (meta_data, labels, bboxes) diff --git a/rocAL_pybind/amd/rocal/types.py b/rocAL_pybind/amd/rocal/types.py index 6edd9a9c9..4409f5b19 100644 --- a/rocAL_pybind/amd/rocal/types.py +++ b/rocAL_pybind/amd/rocal/types.py @@ -79,6 +79,7 @@ from rocal_pybind.types import SCALING_MODE_STRETCH from rocal_pybind.types import SCALING_MODE_NOT_SMALLER from rocal_pybind.types import SCALING_MODE_NOT_LARGER +from rocal_pybind.types import SCALING_MODE_MIN_MAX # RocalResizeInterpolationType from rocal_pybind.types import NEAREST_NEIGHBOR_INTERPOLATION @@ -141,6 +142,7 @@ SCALING_MODE_STRETCH: ("SCALING_MODE_STRETCH", SCALING_MODE_STRETCH), SCALING_MODE_NOT_SMALLER: ("SCALING_MODE_NOT_SMALLER", SCALING_MODE_NOT_SMALLER), SCALING_MODE_NOT_LARGER: ("SCALING_MODE_NOT_LARGER", SCALING_MODE_NOT_LARGER), + SCALING_MODE_MIN_MAX: ("SCALING_MODE_MIN_MAX", SCALING_MODE_MIN_MAX), } diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 3c83772ad..e774f1f21 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -84,6 +84,18 @@ py::object wrapper_image_name(RocalContext context, int array_len) { return py::bytes(s); } +py::object wrapper_copy_to_tensor(RocalContext context, py::object p, + RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, + float multiplier1, float multiplier2, float offset0, float offset1, float offset2, + bool reverse_channels, RocalOutputMemType output_mem_type, int max_height, int max_width) { + auto ptr = ctypes_void_ptr(p); + // call pure C++ function + int status = rocalToTensor(context, ptr, tensor_format, tensor_output_type, multiplier0, + multiplier1, multiplier2, offset0, offset1, offset2, + reverse_channels, output_mem_type, max_height, max_width); + return py::cast(Py_None); +} + std::unordered_map rocalToPybindLayout = { {0, "NHWC"}, {1, "NCHW"}, @@ -290,6 +302,7 @@ PYBIND11_MODULE(rocal_pybind, m) { .value("SCALING_MODE_STRETCH", ROCAL_SCALING_MODE_STRETCH) .value("SCALING_MODE_NOT_SMALLER", ROCAL_SCALING_MODE_NOT_SMALLER) .value("SCALING_MODE_NOT_LARGER", ROCAL_SCALING_MODE_NOT_LARGER) + .value("SCALING_MODE_MIN_MAX", ROCAL_SCALING_MODE_MIN_MAX) .export_values(); py::enum_(types_m, "RocalResizeInterpolationType", "Decode size policies") .value("NEAREST_NEIGHBOR_INTERPOLATION", ROCAL_NEAREST_NEIGHBOR_INTERPOLATION) @@ -362,6 +375,11 @@ PYBIND11_MODULE(rocal_pybind, m) { int *ptr = static_cast(buf.ptr); rocalGetImageSizes(context, ptr); }); + m.def("getROIImgSizes", [](RocalContext context, py::array_t array) { + auto buf = array.request(); + int *ptr = static_cast(buf.ptr); + rocalGetROIImageSizes(context, ptr); + }); // rocal_api_parameter.h m.def("setSeed", &rocalSetSeed); m.def("getSeed", &rocalGetSeed); @@ -382,6 +400,7 @@ PYBIND11_MODULE(rocal_pybind, m) { m.def("getIntValue", &rocalGetIntValue); m.def("getFloatValue", &rocalGetFloatValue); // rocal_api_data_transfer.h + m.def("rocalToTensor", &wrapper_copy_to_tensor); m.def("getOutputTensors", [](RocalContext context) { rocalTensorList *output_tensor_list = rocalGetOutputTensors(context); py::list list; @@ -435,6 +454,41 @@ PYBIND11_MODULE(rocal_pybind, m) { } return boxes_list; }); + m.def("getMaskCount", [](RocalContext context, py::array_t array) { + auto buf = array.mutable_data(); + unsigned count = rocalGetMaskCount(context, buf); // total number of polygons in complete batch + return count; + }); + m.def("getMaskCoordinates", [](RocalContext context, py::array_t polygon_size, py::array_t mask_count) { + auto buf = polygon_size.request(); + int *polygon_size_ptr = static_cast(buf.ptr); + // call pure C++ function + rocalTensorList *mask_data = rocalGetMaskCoordinates(context, polygon_size_ptr); + rocalTensorList *bbox_labels = rocalGetBoundingBoxLabel(context); + py::list complete_list; + int poly_cnt = 0; + int prev_object_cnt = 0; + auto mask_count_buf = mask_count.request(); + int *mask_count_ptr = static_cast(mask_count_buf.ptr); + for (int i = 0; i < bbox_labels->size(); i++) { // nbatchSize + float *mask_buffer = static_cast(mask_data->at(i)->buffer()); + py::list poly_batch_list; + for (unsigned j = prev_object_cnt; j < bbox_labels->at(i)->dims().at(0) + prev_object_cnt; j++) { + py::list single_image; + for (int k = 0; k < mask_count_ptr[j]; k++) { + py::list polygons_buffer; + for (int l = 0; l < polygon_size_ptr[poly_cnt]; l++) + polygons_buffer.append(mask_buffer[l]); + mask_buffer += polygon_size_ptr[poly_cnt++]; + single_image.append(polygons_buffer); + } + poly_batch_list.append(single_image); + } + prev_object_cnt += bbox_labels->at(i)->dims().at(0); + complete_list.append(poly_batch_list); + } + return complete_list; + }); // Will be enabled when IOU matcher changes are introduced in C++ // m.def("getMatchedIndices", [](RocalContext context) { // rocalTensorList *matches = rocalGetMatchedIndices(context); From 416b6f516f4d7cc58f90706a0377a7d9a5633c6f Mon Sep 17 00:00:00 2001 From: fgladwin Date: Sun, 15 Oct 2023 13:13:13 -0400 Subject: [PATCH 02/33] Fix build issue --- rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp b/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp index 8a24dc68c..d2229d22a 100644 --- a/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp +++ b/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp @@ -77,8 +77,8 @@ void ResizeMirrorNormalizeMetaNode::update_parameters(pMetaDataBatch input_meta_ } // get roi width and height of output image auto img_roi_size = input_meta_data->get_img_roi_sizes_batch()[i]; - img_roi_size.w = output_roi[i].x2; - img_roi_size.h = output_roi[i].y2; + img_roi_size.w = output_roi[i].xywh.w; + img_roi_size.h = output_roi[i].xywh.h; output_meta_data->get_img_roi_sizes_batch()[i] = img_roi_size; output_meta_data->get_bb_cords_batch()[i] = bb_coords; output_meta_data->get_labels_batch()[i] = bb_labels; From d2f8fab28be9b6d9d03b88fa1b7c4bf47ea9013e Mon Sep 17 00:00:00 2001 From: fgladwin Date: Sun, 15 Oct 2023 13:32:16 -0400 Subject: [PATCH 03/33] Add box IOU matcher changes Add pybind changes for IOU matcher Remove BoundingBoxCordf --- rocAL/include/api/rocal_api_meta_data.h | 22 ++- rocAL/include/meta_data/bounding_box_graph.h | 18 +-- rocAL/include/meta_data/meta_data_graph.h | 1 + rocAL/include/pipeline/master_graph.h | 11 +- rocAL/source/api/rocal_api_meta_data.cpp | 32 ++++- rocAL/source/meta_data/bounding_box_graph.cpp | 128 +++++++++++++----- rocAL/source/pipeline/master_graph.cpp | 43 +++++- rocAL_pybind/amd/rocal/fn.py | 2 +- rocAL_pybind/amd/rocal/readers.py | 3 +- rocAL_pybind/rocal_pybind.cpp | 25 ++-- 10 files changed, 217 insertions(+), 68 deletions(-) diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index 5c5a305dd..d339944c7 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -81,9 +81,10 @@ extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContex * \param [in] is_box_encoder If set to True, bboxes are returned as encoded bboxes using the anchors * \param [in] avoid_class_remapping If set to True, classes are returned directly. Otherwise, classes are mapped to consecutive values * \param [in] aspect_ratio_grouping If set to True, images are sorted by their aspect ratio and returned + * \param [in] is_box_iou_matcher If set to True, box iou matcher which returns matched indices is enabled in the pipeline * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false); /*! \brief create coco reader key points * \ingroup group_rocal_meta_data @@ -296,4 +297,23 @@ extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int* buf) */ extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData** joints_data); +/*! \brief API to enable box IOU matcher and pass required params to pipeline + * \ingroup group_rocal_meta_data + * \param [in] p_context rocAL context + * \param [in] anchors The anchors / ground truth bounding box coordinates + * \param [in] criteria Threshold IoU for matching bounding boxes with anchors. + * \param [in] high_threshold The max threshold for IOU + * \param [in] low_threshold The min threshold for IOU + * \param [in] allow_low_quality_matches bool value when set to true allows low quality matches + */ +extern "C" void ROCAL_API_CALL rocalBoxIouMatcher(RocalContext p_context, std::vector& anchors, float criteria, + float high_threshold, float low_threshold, bool allow_low_quality_matches = true); + +/*! \brief API to return the matched idices for the bounding box and anchors + * \ingroup group_rocal_meta_data + * \param [in] rocal_context rocAL context + * \return RocalTensorList of matched indices + */ +extern "C" RocalTensorList ROCAL_API_CALL rocalGetMatchedIndices(RocalContext rocal_context); + #endif // MIVISIONX_ROCAL_API_META_DATA_H diff --git a/rocAL/include/meta_data/bounding_box_graph.h b/rocAL/include/meta_data/bounding_box_graph.h index 76e2cf5fe..6591e536f 100644 --- a/rocAL/include/meta_data/bounding_box_graph.h +++ b/rocAL/include/meta_data/bounding_box_graph.h @@ -26,22 +26,7 @@ THE SOFTWARE. #include "meta_data_graph.h" #include "meta_node.h" -typedef struct { - float xc; - float yc; - float w; - float h; -} BoundingBoxCord_xcycwh; -typedef struct { - float l; - float t; - float r; - float b; -} BoundingBoxCord_ltrb; -typedef union { - BoundingBoxCord_xcycwh xcycwh; - BoundingBoxCord_ltrb ltrb; -} BoundingBoxCordf; // Union comprises of float bbox cords of ltrb/xcycwh type +typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh; class BoundingBoxGraph : public MetaDataGraph { public: @@ -49,4 +34,5 @@ class BoundingBoxGraph : public MetaDataGraph { void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decode_image_info) override; void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) override; void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) override; + void update_box_iou_matcher(std::vector *anchors, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data, float criteria, float high_threshold, float low_threshold, bool allow_low_quality_matches) override; }; diff --git a/rocAL/include/meta_data/meta_data_graph.h b/rocAL/include/meta_data/meta_data_graph.h index b66c5d15a..735b3b9ab 100644 --- a/rocAL/include/meta_data/meta_data_graph.h +++ b/rocAL/include/meta_data/meta_data_graph.h @@ -37,5 +37,6 @@ class MetaDataGraph { virtual void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decoded_image_info) = 0; virtual void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) = 0; virtual void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) = 0; + virtual void update_box_iou_matcher(std::vector *anchors, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data, float criteria, float high_threshold, float low_threshold, bool allow_low_quality_matches) = 0; std::list> _meta_nodes; }; diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index dd5662c93..28d442e34 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -50,6 +50,7 @@ THE SOFTWARE. #define BBOX_COUNT 4 #define MAX_NUM_ANCHORS 8732 // Num of bbox achors used in SSD training #define MAX_MASK_BUFFER 10000 +#define MAX_ANCHORS 120087 // Num of bbox achors used in Retinanet training #if ENABLE_SIMD #if _WIN32 @@ -107,18 +108,20 @@ class MasterGraph { std::vector create_label_reader(const char *source_path, MetaDataReaderType reader_type); std::vector create_video_label_reader(const char *source_path, MetaDataReaderType reader_type, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); std::vector create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false, - bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); + bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); std::vector create_tf_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type, const std::map feature_key_map); std::vector create_caffe_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type); std::vector create_caffe2_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type); std::vector create_cifar10_label_reader(const char *source_path, const char *file_prefix); std::vector create_mxnet_label_reader(const char *source_path, bool is_output); void box_encoder(std::vector &anchors, float criteria, const std::vector &means, const std::vector &stds, bool offset, float scale); + void box_iou_matcher(std::vector &anchors, float criteria, float high_threshold, float low_threshold, bool allow_low_quality_matches); void create_randombboxcrop_reader(RandomBBoxCrop_MetaDataReaderType reader_type, RandomBBoxCrop_MetaDataType label_type, bool all_boxes_overlap, bool no_crop, FloatParam *aspect_ratio, bool has_shape, int crop_width, int crop_height, int num_attempts, FloatParam *scaling, int total_num_attempts, int64_t seed = 0); const std::pair &meta_data(); TensorList *labels_meta_data(); TensorList *bbox_meta_data(); TensorList *mask_meta_data(); + TensorList *matched_index_meta_data(); void set_loop(bool val) { _loop = val; } void set_output(Tensor *output_tensor); size_t calculate_cpu_num_threads(size_t shard_count); @@ -164,6 +167,7 @@ class MasterGraph { TensorList _labels_tensor_list; TensorList _bbox_tensor_list; TensorList _mask_tensor_list; + TensorList _matches_tensor_list; std::vector _meta_data_buffer_size; #if ENABLE_HIP DeviceManagerHip _device; //!< Keeps the device related constructs needed for running on GPU @@ -204,6 +208,11 @@ class MasterGraph { bool _offset; // Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments if offset="True" std::vector _means, _stds; //_means: [x y w h] mean values for normalization _stds: [x y w h] standard deviations for offset normalization. bool _augmentation_metanode = false; + // box IoU matcher variables + bool _is_box_iou_matcher = false; // bool variable to set the box iou matcher + float _high_threshold = 0.5f; // Max IoU threshold + float _low_threshold = 0.4f; // Min IoU threshold + bool _allow_low_quality_matches = true; // Set to true to include low quality matches in matched idx generation #if ENABLE_HIP BoxEncoderGpu *_box_encoder_gpu = nullptr; #endif diff --git a/rocAL/source/api/rocal_api_meta_data.cpp b/rocAL/source/api/rocal_api_meta_data.cpp index 0eaf89958..34beb9a5f 100644 --- a/rocAL/source/api/rocal_api_meta_data.cpp +++ b/rocAL/source/api/rocal_api_meta_data.cpp @@ -71,14 +71,14 @@ RocalMetaData RocalMetaData ROCAL_API_CALL - rocalCreateCOCOReader(RocalContext p_context, const char* source_path, bool is_output, bool mask, bool ltrb, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping) { + rocalCreateCOCOReader(RocalContext p_context, const char* source_path, bool is_output, bool mask, bool ltrb, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping, bool is_box_iou_matcher) { if (!p_context) THROW("Invalid rocal context passed to rocalCreateCOCOReader") auto context = static_cast(p_context); if (mask) { - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::PolygonMask, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::PolygonMask, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping, is_box_iou_matcher); } - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::BoundingBox, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::BoundingBox, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping, is_box_iou_matcher); } RocalMetaData @@ -88,7 +88,7 @@ RocalMetaData THROW("Invalid rocal context passed to rocalCreateCOCOReaderKeyPoints") auto context = static_cast(p_context); - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_KEY_POINTS_META_DATA_READER, MetaDataType::KeyPoints, sigma, pose_output_width, pose_output_height); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_KEY_POINTS_META_DATA_READER, MetaDataType::KeyPoints, false, false, false, false, sigma, pose_output_width, pose_output_height); } RocalMetaData @@ -488,3 +488,27 @@ void *joints_data = (RocalJointsData*)(&(meta_data.second->get_joints_data_batch())); } + +void + ROCAL_API_CALL + rocalBoxIouMatcher(RocalContext p_context, + std::vector& anchors, + float criteria, float high_threshold, + float low_threshold, + bool allow_low_quality_matches) { + if (!p_context) + THROW("Invalid rocal context passed to rocalBoxIouMatcher") + auto context = static_cast(p_context); + context->master_graph->box_iou_matcher(anchors, criteria, high_threshold, + low_threshold, + allow_low_quality_matches); +} + +RocalTensorList + ROCAL_API_CALL + rocalGetMatchedIndices(RocalContext p_context) { + if (!p_context) + THROW("Invalid rocal context passed to rocalGetMatchedIndices") + auto context = static_cast(p_context); + return context->master_graph->matched_index_meta_data(); +} diff --git a/rocAL/source/meta_data/bounding_box_graph.cpp b/rocAL/source/meta_data/bounding_box_graph.cpp index 2c6b2d105..9e7d19d72 100644 --- a/rocAL/source/meta_data/bounding_box_graph.cpp +++ b/rocAL/source/meta_data/bounding_box_graph.cpp @@ -58,13 +58,13 @@ void BoundingBoxGraph::update_meta_data(pMetaDataBatch input_meta_data, decoded_ } } -inline float ssd_BBoxIntersectionOverUnion(const BoundingBoxCord &box1, const float &box1_area, const BoundingBoxCordf &box2) { - float xA = std::max(static_cast(box1.l), box2.ltrb.l); - float yA = std::max(static_cast(box1.t), box2.ltrb.t); - float xB = std::min(static_cast(box1.r), box2.ltrb.r); - float yB = std::min(static_cast(box1.b), box2.ltrb.b); +inline float ssd_BBoxIntersectionOverUnion(const BoundingBoxCord &box1, const float &box1_area, const BoundingBoxCord &box2) { + float xA = std::max(static_cast(box1.l), box2.l); + float yA = std::max(static_cast(box1.t), box2.t); + float xB = std::min(static_cast(box1.r), box2.r); + float yB = std::min(static_cast(box1.b), box2.b); float intersection_area = std::max((float)0.0, xB - xA) * std::max((float)0.0, yB - yA); - float box2_area = (box2.ltrb.b - box2.ltrb.t) * (box2.ltrb.r - box2.ltrb.l); + float box2_area = (box2.b - box2.t) * (box2.r - box2.l); return (float)(intersection_area / (box1_area + box2_area - intersection_area)); } @@ -116,7 +116,7 @@ void BoundingBoxGraph::update_random_bbox_meta_data(pMetaDataBatch input_meta_da } } -inline void calculate_ious_for_box(float *ious, BoundingBoxCord &box, BoundingBoxCordf *anchors, unsigned int num_anchors) { +inline void calculate_ious_for_box(float *ious, BoundingBoxCord &box, BoundingBoxCord *anchors, unsigned int num_anchors) { float box_area = (box.b - box.t) * (box.r - box.l); ious[0] = ssd_BBoxIntersectionOverUnion(box, box_area, anchors[0]); @@ -149,13 +149,13 @@ inline int find_best_box_for_anchor(unsigned anchor_idx, const std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) { #pragma omp parallel for for (int i = 0; i < full_batch_meta_data->size(); i++) { - BoundingBoxCordf *bbox_anchors = reinterpret_cast(anchors->data()); + BoundingBoxCord *bbox_anchors = reinterpret_cast(anchors->data()); auto bb_count = full_batch_meta_data->get_labels_batch()[i].size(); int *bb_labels = full_batch_meta_data->get_labels_batch()[i].data(); BoundingBoxCord *bb_coords = reinterpret_cast(full_batch_meta_data->get_bb_cords_batch()[i].data()); unsigned anchors_size = anchors->size() / 4; // divide the anchors_size by 4 to get the total number of anchors int *encoded_labels = encoded_labels_data + (i * anchors_size); - BoundingBoxCordf *encoded_bb = reinterpret_cast(encoded_boxes_data + (i * anchors_size * 4)); + BoundingBoxCord_xcycwh *encoded_bb = reinterpret_cast(encoded_boxes_data + (i * anchors_size * 4)); // Calculate Ious // ious size - bboxes count x anchors count std::vector ious(bb_count * anchors_size); @@ -167,36 +167,36 @@ void BoundingBoxGraph::update_box_encoder_meta_data(std::vector *anchors, float half_scale = 0.5 * scale; // Depending on the matches ->place the best bbox instead of the corresponding anchor_idx in anchor for (unsigned anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { - BoundingBoxCordf box_bestidx, anchor_xcyxwh; - BoundingBoxCordf *p_anchor = &bbox_anchors[anchor_idx]; + BoundingBoxCord_xcycwh box_bestidx, anchor_xcyxwh; + BoundingBoxCord *p_anchor = &bbox_anchors[anchor_idx]; const auto best_idx = find_best_box_for_anchor(anchor_idx, ious, bb_count, anchors_size); // Filter matches by criteria if (ious[(best_idx * anchors_size) + anchor_idx] > criteria) // Its a match { // Convert the "ltrb" format to "xcycwh" if (offset) { - box_bestidx.xcycwh.xc = (bb_coords[best_idx].l + bb_coords[best_idx].r) * half_scale; // xc - box_bestidx.xcycwh.yc = (bb_coords[best_idx].t + bb_coords[best_idx].b) * half_scale; // yc - box_bestidx.xcycwh.w = (bb_coords[best_idx].r - bb_coords[best_idx].l) * scale; // w - box_bestidx.xcycwh.h = (bb_coords[best_idx].b - bb_coords[best_idx].t) * scale; // h + box_bestidx.xc = (bb_coords[best_idx].l + bb_coords[best_idx].r) * half_scale; // xc + box_bestidx.yc = (bb_coords[best_idx].t + bb_coords[best_idx].b) * half_scale; // yc + box_bestidx.w = (bb_coords[best_idx].r - bb_coords[best_idx].l) * scale; // w + box_bestidx.h = (bb_coords[best_idx].b - bb_coords[best_idx].t) * scale; // h // Convert the "ltrb" format to "xcycwh" - anchor_xcyxwh.xcycwh.xc = (p_anchor->ltrb.l + p_anchor->ltrb.r) * half_scale; // xc - anchor_xcyxwh.xcycwh.yc = (p_anchor->ltrb.t + p_anchor->ltrb.b) * half_scale; // yc - anchor_xcyxwh.xcycwh.w = (p_anchor->ltrb.r - p_anchor->ltrb.l) * scale; // w - anchor_xcyxwh.xcycwh.h = (p_anchor->ltrb.b - p_anchor->ltrb.t) * scale; // h + anchor_xcyxwh.xc = (p_anchor->l + p_anchor->r) * half_scale; // xc + anchor_xcyxwh.yc = (p_anchor->t + p_anchor->b) * half_scale; // yc + anchor_xcyxwh.w = (p_anchor->r - p_anchor->l) * scale; // w + anchor_xcyxwh.h = (p_anchor->b - p_anchor->t) * scale; // h // Reference for offset calculation between the Ground Truth bounding boxes & anchor boxes in format // https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection#predictions-vis-%C3%A0-vis-priors - box_bestidx.xcycwh.xc = ((box_bestidx.xcycwh.xc - anchor_xcyxwh.xcycwh.xc) / anchor_xcyxwh.xcycwh.w - means[0]) * inv_stds[0]; - box_bestidx.xcycwh.yc = ((box_bestidx.xcycwh.yc - anchor_xcyxwh.xcycwh.yc) / anchor_xcyxwh.xcycwh.h - means[1]) * inv_stds[1]; - box_bestidx.xcycwh.w = (std::log(box_bestidx.xcycwh.w / anchor_xcyxwh.xcycwh.w) - means[2]) * inv_stds[2]; - box_bestidx.xcycwh.h = (std::log(box_bestidx.xcycwh.h / anchor_xcyxwh.xcycwh.h) - means[3]) * inv_stds[3]; + box_bestidx.xc = ((box_bestidx.xc - anchor_xcyxwh.xc) / anchor_xcyxwh.w - means[0]) * inv_stds[0]; + box_bestidx.yc = ((box_bestidx.yc - anchor_xcyxwh.yc) / anchor_xcyxwh.h - means[1]) * inv_stds[1]; + box_bestidx.w = (std::log(box_bestidx.w / anchor_xcyxwh.w) - means[2]) * inv_stds[2]; + box_bestidx.h = (std::log(box_bestidx.h / anchor_xcyxwh.h) - means[3]) * inv_stds[3]; encoded_bb[anchor_idx] = box_bestidx; encoded_labels[anchor_idx] = bb_labels[best_idx]; } else { - box_bestidx.xcycwh.xc = 0.5 * (bb_coords[best_idx].l + bb_coords[best_idx].r); // xc - box_bestidx.xcycwh.yc = 0.5 * (bb_coords[best_idx].t + bb_coords[best_idx].b); // yc - box_bestidx.xcycwh.w = bb_coords[best_idx].r - bb_coords[best_idx].l; // w - box_bestidx.xcycwh.h = bb_coords[best_idx].b - bb_coords[best_idx].t; // h + box_bestidx.xc = 0.5 * (bb_coords[best_idx].l + bb_coords[best_idx].r); // xc + box_bestidx.yc = 0.5 * (bb_coords[best_idx].t + bb_coords[best_idx].b); // yc + box_bestidx.w = bb_coords[best_idx].r - bb_coords[best_idx].l; // w + box_bestidx.h = bb_coords[best_idx].b - bb_coords[best_idx].t; // h encoded_bb[anchor_idx] = box_bestidx; encoded_labels[anchor_idx] = bb_labels[best_idx]; } @@ -207,13 +207,79 @@ void BoundingBoxGraph::update_box_encoder_meta_data(std::vector *anchors, encoded_labels[anchor_idx] = 0; } else { // Convert the "ltrb" format to "xcycwh" - encoded_bb[anchor_idx].xcycwh.xc = 0.5 * (p_anchor->ltrb.l + p_anchor->ltrb.r); // xc - encoded_bb[anchor_idx].xcycwh.yc = 0.5 * (p_anchor->ltrb.t + p_anchor->ltrb.b); // yc - encoded_bb[anchor_idx].xcycwh.w = (-p_anchor->ltrb.l + p_anchor->ltrb.r); // w - encoded_bb[anchor_idx].xcycwh.h = (-p_anchor->ltrb.t + p_anchor->ltrb.b); // h + encoded_bb[anchor_idx].xc = 0.5 * (p_anchor->l + p_anchor->r); // xc + encoded_bb[anchor_idx].yc = 0.5 * (p_anchor->t + p_anchor->b); // yc + encoded_bb[anchor_idx].w = (-p_anchor->l + p_anchor->r); // w + encoded_bb[anchor_idx].h = (-p_anchor->t + p_anchor->b); // h encoded_labels[anchor_idx] = 0; } } } } } + +void BoundingBoxGraph::update_box_iou_matcher(std::vector *anchors, int *matches_idx_buffer, + pMetaDataBatch full_batch_meta_data, float criteria, float high_threshold, + float low_threshold, bool allow_low_quality_matches) { + auto bb_coords_batch = full_batch_meta_data->get_bb_cords_batch(); + unsigned anchors_size = anchors->size() / 4; // divide the anchors_size by 4 to get the total number of anchors + BoundingBoxCord *bbox_anchors = reinterpret_cast(anchors->data()); + + std::vector matches(full_batch_meta_data->size()); + for (int i = 0; i < full_batch_meta_data->size(); i++) { + matches[i] = reinterpret_cast(matches_idx_buffer + i * anchors_size); + } + +#pragma omp parallel for + for (int i = 0; i < full_batch_meta_data->size(); i++) { + auto bb_coords = bb_coords_batch[i]; + auto bb_count = bb_coords.size(); + + std::vector matched_vals(anchors_size, -1.0); + std::vector low_quality_preds(anchors_size, -1); + + // Calculate IoU's, The number of IoU Values calculated will be (bb_count x anchors_size) + for (unsigned bb_idx = 0; bb_idx < bb_count; bb_idx++) { + BoundingBoxCord box = bb_coords[bb_idx]; + float box_area = (box.b - box.t) * (box.r - box.l); + float best_bbox_iou = -1.0f; + std::vector bbox_iou(anchors_size); // IoU value for bbox mapped with each anchor + for (unsigned int anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { + float iou_val = ssd_BBoxIntersectionOverUnion(box, box_area, bbox_anchors[anchor_idx]); + bbox_iou[anchor_idx] = iou_val; + + // Find col maximum in (bb_count x anchors_size) IoU values calculated + if (iou_val > matched_vals[anchor_idx]) { + matched_vals[anchor_idx] = iou_val; + matches[i][anchor_idx] = static_cast(bb_idx); + } + + // Find row maximum in (bb_count x anchors_size) IoU values calculated + if (allow_low_quality_matches) { + if (iou_val > best_bbox_iou) best_bbox_iou = iou_val; + } + } + + if (allow_low_quality_matches) { + for (unsigned int anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { // if the element is found + if (fabs(bbox_iou[anchor_idx] - best_bbox_iou) < 1e-6) + low_quality_preds[anchor_idx] = anchor_idx; + } + } + } + + // Update matched indices based on thresholds and low quality matches + for (uint pred_idx = 0; pred_idx < anchors_size; pred_idx++) { + if (!(allow_low_quality_matches && low_quality_preds[pred_idx] != -1)) { + if (matched_vals[pred_idx] < low_threshold) { + matches[i][pred_idx] = -1; + } else if ((matched_vals[pred_idx] < high_threshold)) { + matches[i][pred_idx] = -2; + } + } + } + + matched_vals.clear(); + low_quality_preds.clear(); + } +} diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index ed65248b1..baa5326d1 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -962,6 +962,10 @@ void MasterGraph::output_routine() { #endif _meta_data_graph->update_box_encoder_meta_data(&_anchors, output_meta_data, _criteria, _offset, _scale, _means, _stds, (float *)bbox_encode_write_buffers.first, (int *)bbox_encode_write_buffers.second); } + if (_is_box_iou_matcher) { + int *matches_write_buffer = reinterpret_cast(_ring_buffer.get_meta_write_buffers()[2]); + _meta_data_graph->update_box_iou_matcher(&_anchors, matches_write_buffer, output_meta_data, _criteria, _high_threshold, _low_threshold, _allow_low_quality_matches); + } _bencode_time.end(); #ifdef ROCAL_VIDEO _sequence_start_framenum_vec.insert(_sequence_start_framenum_vec.begin(), _loader_module->get_sequence_start_frame_number()); @@ -1004,7 +1008,8 @@ void MasterGraph::stop_processing() { _output_thread.join(); } -std::vector MasterGraph::create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType metadata_type, bool ltrb_bbox, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping, float sigma, unsigned pose_output_width, unsigned pose_output_height) { +std::vector MasterGraph::create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType metadata_type, bool ltrb_bbox, bool is_box_encoder, + bool avoid_class_remapping, bool aspect_ratio_grouping, bool is_box_iou_matcher, float sigma, unsigned pose_output_width, unsigned pose_output_height) { if (_meta_data_reader) THROW("A metadata reader has already been created") if (_augmented_meta_data) @@ -1039,6 +1044,13 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c default_mask_info.set_metadata(); _meta_data_buffer_size.emplace_back(_user_batch_size * default_mask_info.data_size()); } + if (is_box_iou_matcher) { + _is_box_iou_matcher = true; + dims = {MAX_ANCHORS}; + default_matches_info = TensorInfo(std::move(dims), _mem_type, RocalTensorDataType::INT32); // Create default matches info + default_matches_info.set_metadata(); + _meta_data_buffer_size.emplace_back(_user_batch_size * default_matches_info.data_size()); + } for (unsigned i = 0; i < _user_batch_size; i++) // Create rocALTensorList for each metadata { @@ -1050,12 +1062,18 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c auto mask_info = default_mask_info; _mask_tensor_list.push_back(new Tensor(mask_info)); } + if(is_box_iou_matcher) { + auto matches_info = default_matches_info; + _matches_tensor_list.push_back(new Tensor(matches_info)); + } } _ring_buffer.init_metadata(RocalMemType::HOST, _meta_data_buffer_size); _metadata_output_tensor_list.emplace_back(&_labels_tensor_list); _metadata_output_tensor_list.emplace_back(&_bbox_tensor_list); if (metadata_type == MetaDataType::PolygonMask) _metadata_output_tensor_list.emplace_back(&_mask_tensor_list); + if(is_box_iou_matcher) + _metadata_output_tensor_list.emplace_back(&_matches_tensor_list); return _metadata_output_tensor_list; } @@ -1347,6 +1365,18 @@ const std::pair &MasterGraph::meta_data() { return _ring_buffer.get_meta_data(); } +void MasterGraph::box_iou_matcher(std::vector &anchors, float criteria, + float high_threshold, float low_threshold, + bool allow_low_quality_matches) { + if (!_is_box_iou_matcher) + THROW("Box IOU matcher variable not set cannot return matched idx") + _num_anchors = anchors.size() / 4; + _anchors = anchors; + _high_threshold = high_threshold; + _low_threshold = low_threshold; + _allow_low_quality_matches = allow_low_quality_matches; +} + size_t MasterGraph::bounding_box_batch_count(pMetaDataBatch meta_data_batch) { size_t size = 0; for (unsigned i = 0; i < _user_batch_size; i++) @@ -1396,6 +1426,17 @@ TensorList *MasterGraph::mask_meta_data() { return &_mask_tensor_list; } +TensorList *MasterGraph::matched_index_meta_data() { + if (_ring_buffer.level() == 0) + THROW("No meta data has been loaded") + auto meta_data_buffers = reinterpret_cast(_ring_buffer.get_meta_read_buffers()[2]); // Get matches buffer from ring buffer + for (unsigned i = 0; i < _matches_tensor_list.size(); i++) { + _matches_tensor_list[i]->set_mem_handle(reinterpret_cast(meta_data_buffers)); + meta_data_buffers += _matches_tensor_list[i]->info().data_size(); + } + return &_matches_tensor_list; +} + void MasterGraph::notify_user_thread() { if (_output_routine_finished_processing) return; diff --git a/rocAL_pybind/amd/rocal/fn.py b/rocAL_pybind/amd/rocal/fn.py index a5b60b62c..9e45cefe0 100644 --- a/rocAL_pybind/amd/rocal/fn.py +++ b/rocAL_pybind/amd/rocal/fn.py @@ -1051,7 +1051,7 @@ def box_iou_matcher(*inputs, anchors, criteria=0.5, high_threshold=0.5, # pybind call arguments kwargs_pybind = {"anchors": anchors, "criteria": criteria, "high_threshold": high_threshold, "low_threshold": low_threshold, "allow_low_quality_matches": allow_low_quality_matches} - box_iou_matcher = b.BoxIOUMatcher( + box_iou_matcher = b.boxIouMatcher( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) Pipeline._current_pipeline._box_iou_matcher = True return (box_iou_matcher, []) diff --git a/rocAL_pybind/amd/rocal/readers.py b/rocAL_pybind/amd/rocal/readers.py index 9f28a1996..70e5a25f3 100644 --- a/rocAL_pybind/amd/rocal/readers.py +++ b/rocAL_pybind/amd/rocal/readers.py @@ -57,7 +57,8 @@ def coco(annotations_file='', ltrb=True, masks=False, ratio=False, avoid_class_r "ltrb": ltrb, "is_box_encoder": is_box_encoder, "avoid_class_remapping": avoid_class_remapping, - "aspect_ratio_grouping": aspect_ratio_grouping} + "aspect_ratio_grouping": aspect_ratio_grouping, + "is_box_iou_matcher": is_box_iou_matcher} meta_data = b.cocoReader( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) return (meta_data, labels, bboxes) diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 468be6d99..979cdee0e 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -384,7 +384,7 @@ PYBIND11_MODULE(rocal_pybind, m) { // rocal_api_meta_data.h m.def("randomBBoxCrop", &rocalRandomBBoxCrop); m.def("boxEncoder", &rocalBoxEncoder); - // m.def("BoxIOUMatcher", &rocalBoxIOUMatcher); // Will be enabled when IOU matcher changes are introduced in C++ + m.def("boxIouMatcher", &rocalBoxIouMatcher); m.def("getImgSizes", [](RocalContext context, py::array_t array) { auto buf = array.request(); int *ptr = static_cast(buf.ptr); @@ -504,17 +504,18 @@ PYBIND11_MODULE(rocal_pybind, m) { } return complete_list; }); - // Will be enabled when IOU matcher changes are introduced in C++ - // m.def("getMatchedIndices", [](RocalContext context) { - // rocalTensorList *matches = rocalGetMatchedIndices(context); - // return py::array(py::buffer_info( - // (int *)(matches->at(0)->buffer()), - // sizeof(int), - // py::format_descriptor::format(), - // 1, - // {matches->size() * 120087}, - // {sizeof(int) })); - // }, py::return_value_policy::reference); + m.def( + "getMatchedIndices", [](RocalContext context) { + rocalTensorList *matches = rocalGetMatchedIndices(context); + return py::array(py::buffer_info( + static_cast(matches->at(0)->buffer()), + sizeof(int), + py::format_descriptor::format(), + 1, + {matches->size() * matches->at(0)->dims().at(0)}, + {sizeof(int)})); + }, + py::return_value_policy::reference); m.def("rocalGetEncodedBoxesAndLables", [](RocalContext context, uint batch_size, uint num_anchors) { auto vec_pair_labels_boxes = rocalGetEncodedBoxesAndLables(context, batch_size * num_anchors); auto labels_buf_ptr = static_cast(vec_pair_labels_boxes[0]->at(0)->buffer()); From 36f852ca5a2f8e155f05a594296cbf4342642260 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 16 Oct 2023 08:09:21 +0000 Subject: [PATCH 04/33] Fixing build issues --- .../augmentations/node_sequence_rearrange.cpp | 47 ++++++++----------- .../meta_node_resize_mirror_normalize.cpp | 4 +- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/rocAL/source/augmentations/node_sequence_rearrange.cpp b/rocAL/source/augmentations/node_sequence_rearrange.cpp index f5e7234e6..d95579484 100644 --- a/rocAL/source/augmentations/node_sequence_rearrange.cpp +++ b/rocAL/source/augmentations/node_sequence_rearrange.cpp @@ -20,41 +20,34 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include #include "node_sequence_rearrange.h" -#include "exception.h" +#include +#include + +#include "exception.h" -SequenceRearrangeNode::SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs) : - Node(inputs, outputs) -{ -} +SequenceRearrangeNode::SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs) : Node(inputs, outputs) {} -void SequenceRearrangeNode::create_node() -{ - if(_node) +void SequenceRearrangeNode::create_node() { + if (_node) return; vx_status status; - _sequence_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, _new_sequence_length); - status = vxAddArrayItems(_sequence_array, _new_sequence_length, _new_order.data(), sizeof(vx_uint32)); - if(status != VX_SUCCESS) - THROW("Adding array items failed: "+ TOSTR(status)) - _node = vxExtrppNode_SequenceRearrangebatchPD(_graph->get(), _inputs[0]->handle(), _outputs[0]->handle(), _sequence_array, _new_sequence_length, _sequence_length, _sequence_count); - if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) - THROW("Adding the sequence rearrange (vxExtrppNode_SequenceRearrange) node failed: "+ TOSTR(status)) + vx_array sequence_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, _new_order.size()); + status = vxAddArrayItems(sequence_array, _new_order.size(), _new_order.data(), sizeof(vx_uint32)); + if (status != VX_SUCCESS) + THROW("Adding array items failed: " + TOSTR(status)); + int input_layout = (int)_inputs[0]->info().layout(); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + _node = vxExtRppSequenceRearrange(_graph->get(), _inputs[0]->handle(), _outputs[0]->handle(), sequence_array, input_layout_vx); + + if ((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Adding the sequence rearrange (vxExtRppSequenceRearrange) node failed: " + TOSTR(status)) } -void SequenceRearrangeNode::init(unsigned int* new_order, unsigned int new_sequence_length, unsigned int sequence_length, unsigned int sequence_count) -{ - _new_sequence_length = new_sequence_length; - _sequence_length = sequence_length; - _sequence_count = sequence_count; - _new_order.resize(_new_sequence_length); - std::copy(new_order, new_order + _new_sequence_length, _new_order.begin()); +void SequenceRearrangeNode::init(std::vector &new_order) { + _new_order = new_order; } -void SequenceRearrangeNode::update_node() -{ -} \ No newline at end of file +void SequenceRearrangeNode::update_node() {} diff --git a/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp b/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp index 8a24dc68c..d2229d22a 100644 --- a/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp +++ b/rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp @@ -77,8 +77,8 @@ void ResizeMirrorNormalizeMetaNode::update_parameters(pMetaDataBatch input_meta_ } // get roi width and height of output image auto img_roi_size = input_meta_data->get_img_roi_sizes_batch()[i]; - img_roi_size.w = output_roi[i].x2; - img_roi_size.h = output_roi[i].y2; + img_roi_size.w = output_roi[i].xywh.w; + img_roi_size.h = output_roi[i].xywh.h; output_meta_data->get_img_roi_sizes_batch()[i] = img_roi_size; output_meta_data->get_bb_cords_batch()[i] = bb_coords; output_meta_data->get_labels_batch()[i] = bb_labels; From c87cd2096c18bb66e5e5f8011b4ecf43c7e11afd Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Wed, 25 Oct 2023 14:22:26 +0000 Subject: [PATCH 05/33] Resolving review comments --- rocAL/include/meta_data/meta_data_reader.h | 6 +++--- rocAL/include/pipeline/master_graph.h | 2 +- rocAL/rocAL_hip/rocal_hip_kernels.cpp | 12 ++++++------ rocAL/source/api/rocal_api_meta_data.cpp | 6 ++++-- rocAL/source/pipeline/master_graph.cpp | 10 +++++----- rocAL_pybind/amd/rocal/pipeline.py | 3 +-- rocAL_pybind/rocal_pybind.cpp | 12 ++++++------ 7 files changed, 26 insertions(+), 25 deletions(-) diff --git a/rocAL/include/meta_data/meta_data_reader.h b/rocAL/include/meta_data/meta_data_reader.h index b16722c4e..bdddca51b 100644 --- a/rocAL/include/meta_data/meta_data_reader.h +++ b/rocAL/include/meta_data/meta_data_reader.h @@ -60,8 +60,8 @@ struct MetaDataConfig { bool _aspect_ratio_grouping; public: - MetaDataConfig(const MetaDataType& type, const MetaDataReaderType& reader_type, const std::string& path, const std::map& feature_key_map = std::map(), const std::string file_prefix = std::string(), const unsigned& sequence_length = 3, const unsigned& frame_step = 3, const unsigned& frame_stride = 1, bool avoid_class_remapping = false) - : _type(type), _reader_type(reader_type), _path(path), _feature_key_map(feature_key_map), _file_prefix(file_prefix), _sequence_length(sequence_length), _frame_step(frame_step), _frame_stride(frame_stride), _avoid_class_remapping(avoid_class_remapping) {} + MetaDataConfig(const MetaDataType& type, const MetaDataReaderType& reader_type, const std::string& path, const std::map& feature_key_map = std::map(), const std::string file_prefix = std::string(), const unsigned& sequence_length = 3, const unsigned& frame_step = 3, const unsigned& frame_stride = 1) + : _type(type), _reader_type(reader_type), _path(path), _feature_key_map(feature_key_map), _file_prefix(file_prefix), _sequence_length(sequence_length), _frame_step(frame_step), _frame_stride(frame_stride) {} MetaDataConfig() = delete; MetaDataType type() const { return _type; } MetaDataReaderType reader_type() const { return _reader_type; } @@ -83,7 +83,7 @@ struct MetaDataConfig { class MetaDataReader { private: - bool _aspect_ratio_grouping = false; + bool _aspect_ratio_grouping; public: enum class Status { diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index dd5662c93..4ed84bb6e 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -82,7 +82,7 @@ class MasterGraph { Status reset(); size_t remaining_count(); MasterGraph::Status to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2, - float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, int max_height = 0, int max_width = 0); + float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_height = 0, uint max_width = 0); Status copy_output(unsigned char *out_ptr, size_t out_size_in_bytes); Status copy_out_tensor_planar(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type); diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.cpp b/rocAL/rocAL_hip/rocal_hip_kernels.cpp index 449a8672c..4637ff257 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.cpp +++ b/rocAL/rocAL_hip/rocal_hip_kernels.cpp @@ -136,6 +136,7 @@ Hip_CopyInt8ToNCHW_fp32( const int maxOutW = outDims.y; const int img_offset = C * W * H; const int out_img_offset = C * maxOutW * maxOutH; + unsigned int cstride = maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) return; @@ -145,7 +146,6 @@ Hip_CopyInt8ToNCHW_fp32( // copy float3 pixels to dst const uchar *inp_img = &inp_image_u8[n * img_offset]; float *out_tensor = (float *)output_tensor + n * out_img_offset + dst_buf_offset; - unsigned int stride = maxOutW * maxOutH; if (C == 3) { float3 dst; if (reverse_channels) @@ -153,8 +153,8 @@ Hip_CopyInt8ToNCHW_fp32( else dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; out_tensor[dstIdx] = dst.x; - out_tensor[dstIdx + stride] = dst.y; - out_tensor[dstIdx + stride * 2] = dst.z; + out_tensor[dstIdx + cstride] = dst.y; + out_tensor[dstIdx + cstride * 2] = dst.z; } else { out_tensor[dstIdx] = (float)inp_img[srcIdx] * multiplier.x + offset.x; } @@ -180,6 +180,7 @@ Hip_CopyInt8ToNCHW_fp16( const int maxOutW = outDims.y; const int img_offset = C * W * H; const int out_img_offset = C * maxOutW * maxOutH; + unsigned int cstride = maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) return; @@ -189,7 +190,6 @@ Hip_CopyInt8ToNCHW_fp16( unsigned int srcIdx = (y * W + x) * C; // copy float3 pixels to dst unsigned int dstIdx = y * maxOutW + x; - unsigned int stride = maxOutW * maxOutH; if (C == 3) { float3 dst; if (reverse_channels) @@ -197,8 +197,8 @@ Hip_CopyInt8ToNCHW_fp16( else dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; out_tensor[dstIdx] = __float2half(dst.x); - out_tensor[dstIdx + stride] = __float2half(dst.y); - out_tensor[dstIdx + stride * 2] = __float2half(dst.z); + out_tensor[dstIdx + cstride] = __float2half(dst.y); + out_tensor[dstIdx + cstride * 2] = __float2half(dst.z); } else { out_tensor[dstIdx] = __float2half((float)inp_img[srcIdx] * multiplier.x + offset.x); } diff --git a/rocAL/source/api/rocal_api_meta_data.cpp b/rocAL/source/api/rocal_api_meta_data.cpp index 0eaf89958..ffb68391e 100644 --- a/rocAL/source/api/rocal_api_meta_data.cpp +++ b/rocAL/source/api/rocal_api_meta_data.cpp @@ -421,8 +421,10 @@ void } } -void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria, - std::vector& means, std::vector& stds, bool offset, float scale) { +void + ROCAL_API_CALL + rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria, + std::vector& means, std::vector& stds, bool offset, float scale) { if (!p_context) THROW("Invalid rocal context passed to rocalBoxEncoder") auto context = static_cast(p_context); diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index ed65248b1..72c763406 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -452,7 +452,7 @@ MasterGraph::timing() { MasterGraph::Status MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, - float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, int max_height, int max_width) { + float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_height, uint max_width) { if (no_more_processed_data()) return MasterGraph::Status::NO_MORE_DATA; @@ -678,9 +678,9 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier int alignedLength = (max_width & ~7); // multiple of 8 __m256 fR, fG, fB; - for (int row = 0; row < max_height; row++) { + for (uint row = 0; row < max_height; row++) { unsigned char *in_buffer_row = reinterpret_cast(in_buffer) + (row * input_width_stride); - int col = 0; + uint col = 0; for (; col < alignedLength; col += 8) { __m256i pix0 = _mm256_loadu_si256((const __m256i *)in_buffer_row); pix0 = _mm256_permutevar8x32_epi32(pix0, _mm256_setr_epi32(0, 1, 2, 3, 3, 4, 5, 6)); @@ -746,9 +746,9 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 fR, fG, fB; __m128i tempR, tempG, tempB; - for (int row = 0; row < max_height; row++) { + for (uint row = 0; row < max_height; row++) { unsigned char *in_buffer_row = reinterpret_cast(in_buffer) + (row * input_width_stride); - int col = 0; + uint col = 0; for (; col < alignedLength; col += 8) { __m256i pix0 = _mm256_loadu_si256((const __m256i *)in_buffer_row); pix0 = _mm256_permutevar8x32_epi32(pix0, _mm256_setr_epi32(0, 1, 2, 3, 3, 4, 5, 6)); diff --git a/rocAL_pybind/amd/rocal/pipeline.py b/rocAL_pybind/amd/rocal/pipeline.py index 6c2dc579d..b0ee8f440 100644 --- a/rocAL_pybind/amd/rocal/pipeline.py +++ b/rocAL_pybind/amd/rocal/pipeline.py @@ -147,9 +147,8 @@ def get_handle(self): return self._handle def copyToExternalTensor(self, array, multiplier, offset, reverse_channels, tensor_format, tensor_dtype, max_height=0, max_width=0): - b.rocalToTensor(self._handle, ctypes.c_void_p(array.data_ptr()), tensor_format, tensor_dtype, - multiplier[0], multiplier[1], multiplier[2], offset[0], offset[1], offset[2], (1 if reverse_channels else 0), self._output_memory_type, max_height, max_width) + multiplier[0], multiplier[1], multiplier[2], offset[0], offset[1], offset[2], (1 if reverse_channels else 0), self._output_memory_type, max_height, max_width) def get_one_hot_encoded_labels(self, array, device): if device == "cpu": diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 468be6d99..835098b3d 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -87,7 +87,7 @@ py::object wrapper_image_name(RocalContext context, int array_len) { py::object wrapper_copy_to_tensor(RocalContext context, py::object p, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type, int max_height, int max_width) { + bool reverse_channels, RocalOutputMemType output_mem_type, uint max_height, uint max_width) { auto ptr = ctypes_void_ptr(p); // call pure C++ function int status = rocalToTensor(context, ptr, tensor_format, tensor_output_type, multiplier0, @@ -489,15 +489,15 @@ PYBIND11_MODULE(rocal_pybind, m) { float *mask_buffer = static_cast(mask_data->at(i)->buffer()); py::list poly_batch_list; for (unsigned j = prev_object_cnt; j < bbox_labels->at(i)->dims().at(0) + prev_object_cnt; j++) { - py::list single_image; + py::list polygons_buffer; for (int k = 0; k < mask_count_ptr[j]; k++) { - py::list polygons_buffer; + py::list coords_buffer; for (int l = 0; l < polygon_size_ptr[poly_cnt]; l++) - polygons_buffer.append(mask_buffer[l]); + coords_buffer.append(mask_buffer[l]); mask_buffer += polygon_size_ptr[poly_cnt++]; - single_image.append(polygons_buffer); + polygons_buffer.append(coords_buffer); } - poly_batch_list.append(single_image); + poly_batch_list.append(polygons_buffer); } prev_object_cnt += bbox_labels->at(i)->dims().at(0); complete_list.append(poly_batch_list); From 8071dfb6efaa597f0a6e482acbecad9e8c85231b Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Wed, 25 Oct 2023 16:53:57 +0000 Subject: [PATCH 06/33] Resolving review comments --- .../include/meta_data/coco_meta_data_reader.h | 2 + rocAL/include/meta_data/meta_data_reader.h | 6 +-- .../readers/image/coco_file_source_reader.h | 1 + rocAL/source/pipeline/master_graph.cpp | 4 +- .../readers/image/coco_file_source_reader.cpp | 49 +++++++------------ 5 files changed, 27 insertions(+), 35 deletions(-) diff --git a/rocAL/include/meta_data/coco_meta_data_reader.h b/rocAL/include/meta_data/coco_meta_data_reader.h index 5e5efc67b..bdfab4efb 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader.h +++ b/rocAL/include/meta_data/coco_meta_data_reader.h @@ -39,6 +39,8 @@ class COCOMetaDataReader : public MetaDataReader { void print_map_contents(); bool set_timestamp_mode() override { return false; } const std::map>& get_map_content() override { return _map_content; } + void set_aspect_ratio_grouping(bool aspect_ratio_grouping) override { _aspect_ratio_grouping = aspect_ratio_grouping; } + bool aspect_ratio_grouping() const override { return _aspect_ratio_grouping; } COCOMetaDataReader(); private: diff --git a/rocAL/include/meta_data/meta_data_reader.h b/rocAL/include/meta_data/meta_data_reader.h index bdddca51b..849603cf6 100644 --- a/rocAL/include/meta_data/meta_data_reader.h +++ b/rocAL/include/meta_data/meta_data_reader.h @@ -82,7 +82,7 @@ struct MetaDataConfig { }; class MetaDataReader { - private: + protected: bool _aspect_ratio_grouping; public: @@ -98,6 +98,6 @@ class MetaDataReader { virtual bool exists(const std::string& image_name) = 0; virtual bool set_timestamp_mode() = 0; virtual ImgSize lookup_image_size(const std::string& image_name) { return {}; } - void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { _aspect_ratio_grouping = aspect_ratio_grouping; } - bool aspect_ratio_grouping() const { return _aspect_ratio_grouping; } + virtual void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { return; } + virtual bool aspect_ratio_grouping() const { return {}; } }; diff --git a/rocAL/include/readers/image/coco_file_source_reader.h b/rocAL/include/readers/image/coco_file_source_reader.h index 0e3a11bb8..ffa35caea 100644 --- a/rocAL/include/readers/image/coco_file_source_reader.h +++ b/rocAL/include/readers/image/coco_file_source_reader.h @@ -104,4 +104,5 @@ class COCOFileSourceReader : public Reader { void incremenet_file_id() { _file_id++; } void replicate_last_image_to_fill_last_shard(); void replicate_last_batch_to_pad_partial_shard(); + void shuffle_with_aspect_ratios(); }; diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index 72c763406..fb20553af 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -675,7 +675,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 padd0 = _mm256_set1_ps(offset0); __m256 padd1 = _mm256_set1_ps(offset1); __m256 padd2 = _mm256_set1_ps(offset2); - int alignedLength = (max_width & ~7); // multiple of 8 + uint alignedLength = (max_width & ~7); // multiple of 8 __m256 fR, fG, fB; for (uint row = 0; row < max_height; row++) { @@ -742,7 +742,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 padd0 = _mm256_set1_ps(offset0); __m256 padd1 = _mm256_set1_ps(offset1); __m256 padd2 = _mm256_set1_ps(offset2); - int alignedLength = (max_width & ~7); // multiple of 8 + uint alignedLength = (max_width & ~7); // multiple of 8 __m256 fR, fG, fB; __m128i tempR, tempG, tempB; diff --git a/rocAL/source/readers/image/coco_file_source_reader.cpp b/rocAL/source/readers/image/coco_file_source_reader.cpp index 22eb63b02..ec3f60501 100644 --- a/rocAL/source/readers/image/coco_file_source_reader.cpp +++ b/rocAL/source/readers/image/coco_file_source_reader.cpp @@ -109,23 +109,10 @@ Reader::Status COCOFileSourceReader::initialize(ReaderConfig desc) { // Copy the sorted file_names to _file_names vector to be used in sharding _file_names = _sorted_file_names; - // Calculate the mid element which divides the aspect ratios into two groups (<=1.0 and >1.0) - auto mid = std::upper_bound(_aspect_ratios.begin(), _aspect_ratios.end(), 1.0f) - _aspect_ratios.begin(); // shuffle dataset if set if (ret == Reader::Status::OK && _shuffle) { - // Shuffle within groups using the mid element as the limit - [start, mid) and [mid, last) - std::random_shuffle(_file_names.begin(), _file_names.begin() + mid); - std::random_shuffle(_file_names.begin() + mid, _file_names.end()); - std::vector shuffled_filenames; - int split_count = _file_names.size() / _batch_count; // Number of batches for this shard - std::vector indexes(split_count); - std::iota(indexes.begin(), indexes.end(), 0); - // Shuffle the index vector and use the index to fetch batch size elements for decoding - std::random_shuffle(indexes.begin(), indexes.end()); - for (auto const idx : indexes) - shuffled_filenames.insert(shuffled_filenames.end(), _file_names.begin() + idx * _batch_count, _file_names.begin() + idx * _batch_count + _batch_count); - _file_names = shuffled_filenames; + shuffle_with_aspect_ratios(); } } else { // shuffle dataset if set @@ -216,25 +203,27 @@ int COCOFileSourceReader::release() { return 0; } +void COCOFileSourceReader::shuffle_with_aspect_ratios() { + // Calculate the mid element which divides the aspect ratios into two groups (<=1.0 and >1.0) + auto mid = std::upper_bound(_aspect_ratios.begin(), _aspect_ratios.end(), 1.0f) - _aspect_ratios.begin(); + // Shuffle within groups using the mid element as the limit - [start, mid) and [mid, last) + std::random_shuffle(_file_names.begin(), _file_names.begin() + mid); + std::random_shuffle(_file_names.begin() + mid, _file_names.end()); + std::vector shuffled_filenames; + int split_count = _file_names.size() / _batch_count; // Number of batches for this shard + std::vector indexes(split_count); + std::iota(indexes.begin(), indexes.end(), 0); + // Shuffle the index vector and use the index to fetch batch size elements for decoding + std::random_shuffle(indexes.begin(), indexes.end()); + for (auto const idx : indexes) + shuffled_filenames.insert(shuffled_filenames.end(), _file_names.begin() + idx * _batch_count, _file_names.begin() + idx * _batch_count + _batch_count); + _file_names = shuffled_filenames; +} + void COCOFileSourceReader::reset() { if (_meta_data_reader && _meta_data_reader->aspect_ratio_grouping()) { _file_names = _sorted_file_names; - // Calculate the mid element which divides the aspect ratios into two groups (<=1.0 and >1.0) - auto mid = std::upper_bound(_aspect_ratios.begin(), _aspect_ratios.end(), 1.0f) - _aspect_ratios.begin(); - if (_shuffle) { - // Shuffle within groups using the mid element as the limit - [start, mid) and [mid, last) - std::random_shuffle(_file_names.begin(), _file_names.begin() + mid); - std::random_shuffle(_file_names.begin() + mid, _file_names.end()); - std::vector shuffled_filenames; - int split_count = _file_names.size() / _batch_count; // Number of batches for this shard - std::vector indexes(split_count); - std::iota(indexes.begin(), indexes.end(), 0); - // Shuffle the index vector and use the index to fetch batch size elements for decoding - std::random_shuffle(indexes.begin(), indexes.end()); - for (auto const idx : indexes) - shuffled_filenames.insert(shuffled_filenames.end(), _file_names.begin() + idx * _batch_count, _file_names.begin() + idx * _batch_count + _batch_count); - _file_names = shuffled_filenames; - } + if (_shuffle) shuffle_with_aspect_ratios(); } else if (_shuffle) { std::random_shuffle(_file_names.begin(), _file_names.end()); } From 5c5a4408ecdf9d322f55c319694155fea532699a Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 26 Oct 2023 18:11:50 +0000 Subject: [PATCH 07/33] Resolving review comments --- .../include/meta_data/coco_meta_data_reader.h | 4 +- rocAL/include/meta_data/meta_data_reader.h | 4 +- rocAL/rocAL_hip/rocal_hip_kernels.cpp | 110 +++++++++--------- rocAL/source/api/rocal_api_augmentation.cpp | 2 + .../meta_data/coco_meta_data_reader.cpp | 6 +- rocAL/source/pipeline/master_graph.cpp | 9 +- .../readers/image/coco_file_source_reader.cpp | 4 +- rocAL_pybind/rocal_pybind.cpp | 2 +- 8 files changed, 70 insertions(+), 71 deletions(-) diff --git a/rocAL/include/meta_data/coco_meta_data_reader.h b/rocAL/include/meta_data/coco_meta_data_reader.h index bdfab4efb..43b7d293b 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader.h +++ b/rocAL/include/meta_data/coco_meta_data_reader.h @@ -40,7 +40,7 @@ class COCOMetaDataReader : public MetaDataReader { bool set_timestamp_mode() override { return false; } const std::map>& get_map_content() override { return _map_content; } void set_aspect_ratio_grouping(bool aspect_ratio_grouping) override { _aspect_ratio_grouping = aspect_ratio_grouping; } - bool aspect_ratio_grouping() const override { return _aspect_ratio_grouping; } + bool get_aspect_ratio_grouping() const override { return _aspect_ratio_grouping; } COCOMetaDataReader(); private: @@ -54,7 +54,7 @@ class COCOMetaDataReader : public MetaDataReader { std::map> _map_content; std::map>::iterator _itr; std::map _map_img_sizes; - std::map _map_img_names; + std::map _map_image_names_to_id; // Maps image names to their image IDs std::map::iterator itr; std::map _label_info; std::map::iterator _it_label; diff --git a/rocAL/include/meta_data/meta_data_reader.h b/rocAL/include/meta_data/meta_data_reader.h index 849603cf6..035fe5411 100644 --- a/rocAL/include/meta_data/meta_data_reader.h +++ b/rocAL/include/meta_data/meta_data_reader.h @@ -69,7 +69,7 @@ struct MetaDataConfig { std::map feature_key_map() const { return _feature_key_map; } std::string file_prefix() const { return _file_prefix; } bool class_remapping() const { return _avoid_class_remapping; } - bool aspect_ratio_grouping() const { return _aspect_ratio_grouping; } + bool get_aspect_ratio_grouping() const { return _aspect_ratio_grouping; } unsigned sequence_length() const { return _sequence_length; } unsigned frame_step() const { return _frame_step; } unsigned frame_stride() const { return _frame_stride; } @@ -99,5 +99,5 @@ class MetaDataReader { virtual bool set_timestamp_mode() = 0; virtual ImgSize lookup_image_size(const std::string& image_name) { return {}; } virtual void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { return; } - virtual bool aspect_ratio_grouping() const { return {}; } + virtual bool get_aspect_ratio_grouping() const { return {}; } }; diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.cpp b/rocAL/rocAL_hip/rocal_hip_kernels.cpp index 4637ff257..3475ec13a 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.cpp +++ b/rocAL/rocAL_hip/rocal_hip_kernels.cpp @@ -32,7 +32,7 @@ Hip_CopyInt8ToNHWC_fp32( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, - uint2 outDims, + uint2 out_dims, float3 multiplier, float3 offset, unsigned int reverse_channels) { @@ -41,32 +41,32 @@ Hip_CopyInt8ToNHWC_fp32( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = outDims.x; - const int maxOutW = outDims.y; + const int maxOutH = out_dims.x; + const int maxOutW = out_dims.y; const int img_offset = C * W * H; const int out_img_offset = C * maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { - unsigned int srcIdx = (y * W + x) * C; // src is RGB - unsigned int dstIdx = (y * maxOutW + x) * C; + unsigned int src_idx = (y * W + x) * C; // src is RGB + unsigned int dst_idx = (y * maxOutW + x) * C; // copy float3 pixels to dst if (C == 3) { float3 dst; const uchar *inp_img = &inp_image_u8[n * img_offset]; float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * out_img_offset); if (reverse_channels) - dst = make_float3((float)inp_img[srcIdx + 2], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx]) * multiplier + offset; + dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; else - dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; - out_tensor[dstIdx] = dst.x; - out_tensor[dstIdx + 1] = dst.y; - out_tensor[dstIdx + 2] = dst.z; + dst = make_float3((float)inp_img[src_idx], (float)inp_img[src_idx + 1], (float)inp_img[src_idx + 2]) * multiplier + offset; + out_tensor[dst_idx] = dst.x; + out_tensor[dst_idx + 1] = dst.y; + out_tensor[dst_idx + 2] = dst.z; } else { const uchar *inp_img = &inp_image_u8[n * img_offset + dst_buf_offset]; float *out_tensor = (float *)output_tensor + dst_buf_offset + n * out_img_offset; - out_tensor[dstIdx] = (float)inp_img[srcIdx] * multiplier.x + offset.x; + out_tensor[dst_idx] = (float)inp_img[src_idx] * multiplier.x + offset.x; } } } @@ -77,7 +77,7 @@ Hip_CopyInt8ToNHWC_fp16( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, - uint2 outDims, + uint2 out_dims, float3 multiplier, float3 offset, const unsigned int reverse_channels) { @@ -86,8 +86,8 @@ Hip_CopyInt8ToNHWC_fp16( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = outDims.x; - const int maxOutW = outDims.y; + const int maxOutH = out_dims.x; + const int maxOutW = out_dims.y; const int img_offset = C * W * H; const int out_img_offset = C * maxOutW * maxOutH; @@ -95,24 +95,24 @@ Hip_CopyInt8ToNHWC_fp16( return; for (unsigned int n = 0; n < nchw.x; n++) { __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * out_img_offset; - unsigned int srcIdx = (y * W + x) * C; + unsigned int src_idx = (y * W + x) * C; // copy float3 pixels to dst if (C == 3) { - unsigned int dstIdx = y * maxOutW + x * 3; + unsigned int dst_idx = y * maxOutW + x * 3; const uchar *inp_img = &inp_image_u8[n * img_offset]; float3 dst; if (reverse_channels) - dst = make_float3((float)inp_img[srcIdx + 2], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx]) * multiplier + offset; + dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; else - dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; - out_tensor[dstIdx] = __float2half(dst.x); - out_tensor[dstIdx + 1] = __float2half(dst.y); - out_tensor[dstIdx + 2] = __float2half(dst.z); + dst = make_float3((float)inp_img[src_idx], (float)inp_img[src_idx + 1], (float)inp_img[src_idx + 2]) * multiplier + offset; + out_tensor[dst_idx] = __float2half(dst.x); + out_tensor[dst_idx + 1] = __float2half(dst.y); + out_tensor[dst_idx + 2] = __float2half(dst.z); } else { - unsigned int dstIdx = y * maxOutW + x; + unsigned int dst_idx = y * maxOutW + x; const uchar *inp_img = &inp_image_u8[n * img_offset]; float *out_tensor = (float *)output_tensor + n * out_img_offset; - out_tensor[dstIdx] = __float2half((float)inp_img[srcIdx] * multiplier.x + offset.x); + out_tensor[dst_idx] = __float2half((float)inp_img[src_idx] * multiplier.x + offset.x); } } } @@ -123,7 +123,7 @@ Hip_CopyInt8ToNCHW_fp32( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, - uint2 outDims, + uint2 out_dims, float3 multiplier, float3 offset, unsigned int reverse_channels) { @@ -132,8 +132,8 @@ Hip_CopyInt8ToNCHW_fp32( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = outDims.x; - const int maxOutW = outDims.y; + const int maxOutH = out_dims.x; + const int maxOutW = out_dims.y; const int img_offset = C * W * H; const int out_img_offset = C * maxOutW * maxOutH; unsigned int cstride = maxOutW * maxOutH; @@ -141,22 +141,22 @@ Hip_CopyInt8ToNCHW_fp32( if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { - unsigned int srcIdx = (y * W + x) * C; - unsigned int dstIdx = y * maxOutW + x; + unsigned int src_idx = (y * W + x) * C; + unsigned int dst_idx = y * maxOutW + x; // copy float3 pixels to dst const uchar *inp_img = &inp_image_u8[n * img_offset]; float *out_tensor = (float *)output_tensor + n * out_img_offset + dst_buf_offset; if (C == 3) { float3 dst; if (reverse_channels) - dst = make_float3((float)inp_img[srcIdx + 2], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx]) * multiplier + offset; + dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; else - dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; - out_tensor[dstIdx] = dst.x; - out_tensor[dstIdx + cstride] = dst.y; - out_tensor[dstIdx + cstride * 2] = dst.z; + dst = make_float3((float)inp_img[src_idx], (float)inp_img[src_idx + 1], (float)inp_img[src_idx + 2]) * multiplier + offset; + out_tensor[dst_idx] = dst.x; + out_tensor[dst_idx + cstride] = dst.y; + out_tensor[dst_idx + cstride * 2] = dst.z; } else { - out_tensor[dstIdx] = (float)inp_img[srcIdx] * multiplier.x + offset.x; + out_tensor[dst_idx] = (float)inp_img[src_idx] * multiplier.x + offset.x; } } } @@ -167,7 +167,7 @@ Hip_CopyInt8ToNCHW_fp16( void *output_tensor, unsigned int dst_buf_offset, uint4 nchw, - uint2 outDims, + uint2 out_dims, float3 multiplier, float3 offset, const unsigned int reverse_channels) { @@ -176,8 +176,8 @@ Hip_CopyInt8ToNCHW_fp16( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = outDims.x; - const int maxOutW = outDims.y; + const int maxOutH = out_dims.x; + const int maxOutW = out_dims.y; const int img_offset = C * W * H; const int out_img_offset = C * maxOutW * maxOutH; unsigned int cstride = maxOutW * maxOutH; @@ -187,20 +187,20 @@ Hip_CopyInt8ToNCHW_fp16( for (unsigned int n = 0; n < nchw.x; n++) { __half *out_tensor = (__half *)output_tensor + n * out_img_offset + dst_buf_offset; const uchar *inp_img = &inp_image_u8[n * img_offset]; - unsigned int srcIdx = (y * W + x) * C; + unsigned int src_idx = (y * W + x) * C; // copy float3 pixels to dst - unsigned int dstIdx = y * maxOutW + x; + unsigned int dst_idx = y * maxOutW + x; if (C == 3) { float3 dst; if (reverse_channels) - dst = make_float3((float)inp_img[srcIdx + 2], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx]) * multiplier + offset; + dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; else - dst = make_float3((float)inp_img[srcIdx], (float)inp_img[srcIdx + 1], (float)inp_img[srcIdx + 2]) * multiplier + offset; - out_tensor[dstIdx] = __float2half(dst.x); - out_tensor[dstIdx + cstride] = __float2half(dst.y); - out_tensor[dstIdx + cstride * 2] = __float2half(dst.z); + dst = make_float3((float)inp_img[src_idx], (float)inp_img[src_idx + 1], (float)inp_img[src_idx + 2]) * multiplier + offset; + out_tensor[dst_idx] = __float2half(dst.x); + out_tensor[dst_idx + cstride] = __float2half(dst.y); + out_tensor[dst_idx + cstride * 2] = __float2half(dst.z); } else { - out_tensor[dstIdx] = __float2half((float)inp_img[srcIdx] * multiplier.x + offset.x); + out_tensor[dst_idx] = __float2half((float)inp_img[src_idx] * multiplier.x + offset.x); } } } @@ -225,18 +225,18 @@ int HipExecCopyInt8ToNHWC( const unsigned max_output_height, const unsigned max_output_width) { int localThreads_x = 16, localThreads_y = 16; - uint2 outDims; + uint2 out_dims; if ((max_output_height == 0) || (max_output_width == 0)) - outDims = make_uint2(h, w); + out_dims = make_uint2(h, w); else - outDims = make_uint2(max_output_height, max_output_width); + out_dims = make_uint2(max_output_height, max_output_width); int globalThreads_x = w, globalThreads_y = h; if (!fp16) { hipLaunchKernelGGL(Hip_CopyInt8ToNHWC_fp32, dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), outDims, + make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } else { @@ -244,7 +244,7 @@ int HipExecCopyInt8ToNHWC( dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), outDims, + make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } @@ -271,18 +271,18 @@ int HipExecCopyInt8ToNCHW( const unsigned max_output_height, const unsigned max_output_width) { int localThreads_x = 16, localThreads_y = 16; - uint2 outDims; + uint2 out_dims; if ((max_output_height == 0) || (max_output_width == 0)) - outDims = make_uint2(h, w); + out_dims = make_uint2(h, w); else - outDims = make_uint2(max_output_height, max_output_width); + out_dims = make_uint2(max_output_height, max_output_width); int globalThreads_x = w, globalThreads_y = h; if (!fp16) { hipLaunchKernelGGL(Hip_CopyInt8ToNCHW_fp32, dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), outDims, + make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } else { @@ -290,7 +290,7 @@ int HipExecCopyInt8ToNCHW( dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), dim3(localThreads_x, localThreads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, - make_uint4(n, c, h, w), outDims, + make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index 4137c50b6..0988b58ed 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -554,8 +554,10 @@ RocalTensor ROCAL_API_CALL try { if ((dest_width | dest_height | resize_longer | resize_shorter) == 0) THROW("Atleast one size 'dest_width' or 'dest_height' or 'resize_shorter' or 'resize_longer' must be specified") + // Specifying dest width and height along with Resize_shorter and resize_longer can be used together in case of MIN_MAX_SCALING_MODE - for other scaling modes, this throws an error if ((dest_width | dest_height) && (resize_longer | resize_shorter) && (scaling_mode != RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX)) THROW("Only one method of specifying size can be used \ndest_width and/or dest_height\nresize_shorter\nresize_longer") + // Resize_shorter and resize_longer can be used together in case of MIN_MAX_SCALING_MODE - for other scaling modes, this throws an error if (resize_longer && resize_shorter && scaling_mode != RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX) THROW("'resize_longer' and 'resize_shorter' can only be passed together for min max scaling mode") diff --git a/rocAL/source/meta_data/coco_meta_data_reader.cpp b/rocAL/source/meta_data/coco_meta_data_reader.cpp index d0ddea904..8a23120ad 100644 --- a/rocAL/source/meta_data/coco_meta_data_reader.cpp +++ b/rocAL/source/meta_data/coco_meta_data_reader.cpp @@ -34,7 +34,7 @@ using namespace std; void COCOMetaDataReader::init(const MetaDataConfig &cfg, pMetaDataBatch meta_data_batch) { _path = cfg.path(); _avoid_class_remapping = cfg.class_remapping(); - this->set_aspect_ratio_grouping(cfg.aspect_ratio_grouping()); + this->set_aspect_ratio_grouping(cfg.get_aspect_ratio_grouping()); _output = meta_data_batch; _output->set_metadata_type(cfg.type()); } @@ -194,7 +194,7 @@ void COCOMetaDataReader::read_all(const std::string &path) { parser.SkipValue(); } } - _map_img_names.insert(pair(image_id, image_name)); + _map_image_names_to_id.insert(pair(image_id, image_name)); _map_img_sizes.insert(pair(image_name, img_size)); img_size = {}; } @@ -270,7 +270,7 @@ void COCOMetaDataReader::read_all(const std::string &path) { } } - auto itr = _map_img_names.find(id); + auto itr = _map_image_names_to_id.find(id); auto it = _map_img_sizes.find(itr->second); ImgSize image_size = it->second; // Convert to "ltrb" format if ((_output->get_metadata_type() == MetaDataType::PolygonMask) && iscrowd == 0) { diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index fb20553af..6e3620247 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -687,12 +687,9 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier fB = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_R)); fG = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_G)); fR = _mm256_cvtepi32_ps(_mm256_shuffle_epi8(pix0, mask_B)); - fB = _mm256_mul_ps(fB, pmul0); - fG = _mm256_mul_ps(fG, pmul1); - fR = _mm256_mul_ps(fR, pmul2); - fB = _mm256_add_ps(fB, padd0); - fG = _mm256_add_ps(fG, padd1); - fR = _mm256_add_ps(fR, padd2); + fB = _mm256_fmadd_ps(fB, pmul0, padd0); + fG = _mm256_fmadd_ps(fG, pmul1, padd1); + fR = _mm256_fmadd_ps(fR, pmul2, padd2); _mm256_storeu_ps(B_buf, fB); _mm256_storeu_ps(G_buf, fG); _mm256_storeu_ps(R_buf, fR); diff --git a/rocAL/source/readers/image/coco_file_source_reader.cpp b/rocAL/source/readers/image/coco_file_source_reader.cpp index ec3f60501..f1e656f6a 100644 --- a/rocAL/source/readers/image/coco_file_source_reader.cpp +++ b/rocAL/source/readers/image/coco_file_source_reader.cpp @@ -86,7 +86,7 @@ Reader::Status COCOFileSourceReader::initialize(ReaderConfig desc) { } } - if (_meta_data_reader && _meta_data_reader->aspect_ratio_grouping()) { + if (_meta_data_reader && _meta_data_reader->get_aspect_ratio_grouping()) { // calculate the aspect ratio for each file and create a pair of std::vector> file_aspect_ratio_pair(_file_names.size()); for (size_t i = 0; i < _file_names.size(); i++) { @@ -221,7 +221,7 @@ void COCOFileSourceReader::shuffle_with_aspect_ratios() { } void COCOFileSourceReader::reset() { - if (_meta_data_reader && _meta_data_reader->aspect_ratio_grouping()) { + if (_meta_data_reader && _meta_data_reader->get_aspect_ratio_grouping()) { _file_names = _sorted_file_names; if (_shuffle) shuffle_with_aspect_ratios(); } else if (_shuffle) { diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 835098b3d..b00eb1f80 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -485,7 +485,7 @@ PYBIND11_MODULE(rocal_pybind, m) { int prev_object_cnt = 0; auto mask_count_buf = mask_count.request(); int *mask_count_ptr = static_cast(mask_count_buf.ptr); - for (int i = 0; i < bbox_labels->size(); i++) { // nbatchSize + for (int i = 0; i < bbox_labels->size(); i++) { // For each image in a batch, parse through the mask metadata buffers and convert them to polygons format float *mask_buffer = static_cast(mask_data->at(i)->buffer()); py::list poly_batch_list; for (unsigned j = prev_object_cnt; j < bbox_labels->at(i)->dims().at(0) + prev_object_cnt; j++) { From 5c4a23deac00bf1b472410b438bf32a72fcc01d5 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 26 Oct 2023 18:19:11 +0000 Subject: [PATCH 08/33] Formatting changes --- rocAL/rocAL_hip/rocal_hip_kernels.cpp | 40 +++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.cpp b/rocAL/rocAL_hip/rocal_hip_kernels.cpp index 3475ec13a..f4f1f076d 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.cpp +++ b/rocAL/rocAL_hip/rocal_hip_kernels.cpp @@ -43,8 +43,8 @@ Hip_CopyInt8ToNHWC_fp32( const int C = nchw.y; const int maxOutH = out_dims.x; const int maxOutW = out_dims.y; - const int img_offset = C * W * H; - const int out_img_offset = C * maxOutW * maxOutH; + const int imgOffset = C * W * H; + const int outImgOffset = C * maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) return; @@ -54,8 +54,8 @@ Hip_CopyInt8ToNHWC_fp32( // copy float3 pixels to dst if (C == 3) { float3 dst; - const uchar *inp_img = &inp_image_u8[n * img_offset]; - float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * out_img_offset); + const uchar *inp_img = &inp_image_u8[n * imgOffset]; + float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * outImgOffset); if (reverse_channels) dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; else @@ -64,8 +64,8 @@ Hip_CopyInt8ToNHWC_fp32( out_tensor[dst_idx + 1] = dst.y; out_tensor[dst_idx + 2] = dst.z; } else { - const uchar *inp_img = &inp_image_u8[n * img_offset + dst_buf_offset]; - float *out_tensor = (float *)output_tensor + dst_buf_offset + n * out_img_offset; + const uchar *inp_img = &inp_image_u8[n * imgOffset + dst_buf_offset]; + float *out_tensor = (float *)output_tensor + dst_buf_offset + n * outImgOffset; out_tensor[dst_idx] = (float)inp_img[src_idx] * multiplier.x + offset.x; } } @@ -88,18 +88,18 @@ Hip_CopyInt8ToNHWC_fp16( const int C = nchw.y; const int maxOutH = out_dims.x; const int maxOutW = out_dims.y; - const int img_offset = C * W * H; - const int out_img_offset = C * maxOutW * maxOutH; + const int imgOffset = C * W * H; + const int outImgOffset = C * maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { - __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * out_img_offset; + __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * outImgOffset; unsigned int src_idx = (y * W + x) * C; // copy float3 pixels to dst if (C == 3) { unsigned int dst_idx = y * maxOutW + x * 3; - const uchar *inp_img = &inp_image_u8[n * img_offset]; + const uchar *inp_img = &inp_image_u8[n * imgOffset]; float3 dst; if (reverse_channels) dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; @@ -110,8 +110,8 @@ Hip_CopyInt8ToNHWC_fp16( out_tensor[dst_idx + 2] = __float2half(dst.z); } else { unsigned int dst_idx = y * maxOutW + x; - const uchar *inp_img = &inp_image_u8[n * img_offset]; - float *out_tensor = (float *)output_tensor + n * out_img_offset; + const uchar *inp_img = &inp_image_u8[n * imgOffset]; + float *out_tensor = (float *)output_tensor + n * outImgOffset; out_tensor[dst_idx] = __float2half((float)inp_img[src_idx] * multiplier.x + offset.x); } } @@ -134,8 +134,8 @@ Hip_CopyInt8ToNCHW_fp32( const int C = nchw.y; const int maxOutH = out_dims.x; const int maxOutW = out_dims.y; - const int img_offset = C * W * H; - const int out_img_offset = C * maxOutW * maxOutH; + const int imgOffset = C * W * H; + const int outImgOffset = C * maxOutW * maxOutH; unsigned int cstride = maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) @@ -144,8 +144,8 @@ Hip_CopyInt8ToNCHW_fp32( unsigned int src_idx = (y * W + x) * C; unsigned int dst_idx = y * maxOutW + x; // copy float3 pixels to dst - const uchar *inp_img = &inp_image_u8[n * img_offset]; - float *out_tensor = (float *)output_tensor + n * out_img_offset + dst_buf_offset; + const uchar *inp_img = &inp_image_u8[n * imgOffset]; + float *out_tensor = (float *)output_tensor + n * outImgOffset + dst_buf_offset; if (C == 3) { float3 dst; if (reverse_channels) @@ -178,15 +178,15 @@ Hip_CopyInt8ToNCHW_fp16( const int C = nchw.y; const int maxOutH = out_dims.x; const int maxOutW = out_dims.y; - const int img_offset = C * W * H; - const int out_img_offset = C * maxOutW * maxOutH; + const int imgOffset = C * W * H; + const int outImgOffset = C * maxOutW * maxOutH; unsigned int cstride = maxOutW * maxOutH; if ((x >= maxOutW) || (y >= maxOutH)) return; for (unsigned int n = 0; n < nchw.x; n++) { - __half *out_tensor = (__half *)output_tensor + n * out_img_offset + dst_buf_offset; - const uchar *inp_img = &inp_image_u8[n * img_offset]; + __half *out_tensor = (__half *)output_tensor + n * outImgOffset + dst_buf_offset; + const uchar *inp_img = &inp_image_u8[n * imgOffset]; unsigned int src_idx = (y * W + x) * C; // copy float3 pixels to dst unsigned int dst_idx = y * maxOutW + x; From bffff3f23422624097767180aca16b024cef58e1 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Tue, 31 Oct 2023 11:26:18 +0000 Subject: [PATCH 09/33] Resolving review comments --- rocAL/include/api/rocal_api_data_transfer.h | 2 +- rocAL/include/pipeline/master_graph.h | 2 +- rocAL/rocAL_hip/rocal_hip_kernels.cpp | 102 +++++++++---------- rocAL/rocAL_hip/rocal_hip_kernels.h | 8 +- rocAL/source/api/rocal_api_data_transfer.cpp | 4 +- rocAL/source/pipeline/master_graph.cpp | 32 +++--- rocAL_pybind/amd/rocal/pipeline.py | 4 +- rocAL_pybind/rocal_pybind.cpp | 4 +- 8 files changed, 79 insertions(+), 79 deletions(-) diff --git a/rocAL/include/api/rocal_api_data_transfer.h b/rocAL/include/api/rocal_api_data_transfer.h index f94819273..621c714aa 100644 --- a/rocAL/include/api/rocal_api_data_transfer.h +++ b/rocAL/include/api/rocal_api_data_transfer.h @@ -63,7 +63,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type, int max_height = 0, int max_width = 0); + bool reverse_channels, RocalOutputMemType output_mem_type, int max_roi_height = 0, int max_roi_width = 0); /*! * \brief Sets the output images in the RocalContext diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index 4ed84bb6e..af2f12b84 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -82,7 +82,7 @@ class MasterGraph { Status reset(); size_t remaining_count(); MasterGraph::Status to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2, - float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_height = 0, uint max_width = 0); + float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_roi_height = 0, uint max_roi_width = 0); Status copy_output(unsigned char *out_ptr, size_t out_size_in_bytes); Status copy_out_tensor_planar(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type); diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.cpp b/rocAL/rocAL_hip/rocal_hip_kernels.cpp index f4f1f076d..03ac5a88f 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.cpp +++ b/rocAL/rocAL_hip/rocal_hip_kernels.cpp @@ -41,21 +41,21 @@ Hip_CopyInt8ToNHWC_fp32( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = out_dims.x; - const int maxOutW = out_dims.y; - const int imgOffset = C * W * H; - const int outImgOffset = C * maxOutW * maxOutH; + const int max_roi_height = out_dims.x; + const int max_roi_width = out_dims.y; + const int img_offset = C * W * H; + const int out_img_offset = C * max_roi_width * max_roi_height; - if ((x >= maxOutW) || (y >= maxOutH)) + if ((x >= max_roi_width) || (y >= max_roi_height)) return; for (unsigned int n = 0; n < nchw.x; n++) { unsigned int src_idx = (y * W + x) * C; // src is RGB - unsigned int dst_idx = (y * maxOutW + x) * C; + unsigned int dst_idx = (y * max_roi_width + x) * C; // copy float3 pixels to dst if (C == 3) { float3 dst; - const uchar *inp_img = &inp_image_u8[n * imgOffset]; - float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * outImgOffset); + const uchar *inp_img = &inp_image_u8[n * img_offset]; + float *out_tensor = (float *)((float *)output_tensor + dst_buf_offset + n * out_img_offset); if (reverse_channels) dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; else @@ -64,8 +64,8 @@ Hip_CopyInt8ToNHWC_fp32( out_tensor[dst_idx + 1] = dst.y; out_tensor[dst_idx + 2] = dst.z; } else { - const uchar *inp_img = &inp_image_u8[n * imgOffset + dst_buf_offset]; - float *out_tensor = (float *)output_tensor + dst_buf_offset + n * outImgOffset; + const uchar *inp_img = &inp_image_u8[n * img_offset + dst_buf_offset]; + float *out_tensor = (float *)output_tensor + dst_buf_offset + n * out_img_offset; out_tensor[dst_idx] = (float)inp_img[src_idx] * multiplier.x + offset.x; } } @@ -86,20 +86,20 @@ Hip_CopyInt8ToNHWC_fp16( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = out_dims.x; - const int maxOutW = out_dims.y; - const int imgOffset = C * W * H; - const int outImgOffset = C * maxOutW * maxOutH; + const int max_roi_height = out_dims.x; + const int max_roi_width = out_dims.y; + const int img_offset = C * W * H; + const int out_img_offset = C * max_roi_width * max_roi_height; - if ((x >= maxOutW) || (y >= maxOutH)) + if ((x >= max_roi_width) || (y >= max_roi_height)) return; for (unsigned int n = 0; n < nchw.x; n++) { - __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * outImgOffset; + __half *out_tensor = (__half *)output_tensor + dst_buf_offset + n * out_img_offset; unsigned int src_idx = (y * W + x) * C; // copy float3 pixels to dst if (C == 3) { - unsigned int dst_idx = y * maxOutW + x * 3; - const uchar *inp_img = &inp_image_u8[n * imgOffset]; + unsigned int dst_idx = y * max_roi_width + x * 3; + const uchar *inp_img = &inp_image_u8[n * img_offset]; float3 dst; if (reverse_channels) dst = make_float3((float)inp_img[src_idx + 2], (float)inp_img[src_idx + 1], (float)inp_img[src_idx]) * multiplier + offset; @@ -109,9 +109,9 @@ Hip_CopyInt8ToNHWC_fp16( out_tensor[dst_idx + 1] = __float2half(dst.y); out_tensor[dst_idx + 2] = __float2half(dst.z); } else { - unsigned int dst_idx = y * maxOutW + x; - const uchar *inp_img = &inp_image_u8[n * imgOffset]; - float *out_tensor = (float *)output_tensor + n * outImgOffset; + unsigned int dst_idx = y * max_roi_width + x; + const uchar *inp_img = &inp_image_u8[n * img_offset]; + float *out_tensor = (float *)output_tensor + n * out_img_offset; out_tensor[dst_idx] = __float2half((float)inp_img[src_idx] * multiplier.x + offset.x); } } @@ -132,20 +132,20 @@ Hip_CopyInt8ToNCHW_fp32( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = out_dims.x; - const int maxOutW = out_dims.y; - const int imgOffset = C * W * H; - const int outImgOffset = C * maxOutW * maxOutH; - unsigned int cstride = maxOutW * maxOutH; + const int max_roi_height = out_dims.x; + const int max_roi_width = out_dims.y; + const int img_offset = C * W * H; + const int out_img_offset = C * max_roi_width * max_roi_height; + unsigned int cstride = max_roi_width * max_roi_height; - if ((x >= maxOutW) || (y >= maxOutH)) + if ((x >= max_roi_width) || (y >= max_roi_height)) return; for (unsigned int n = 0; n < nchw.x; n++) { unsigned int src_idx = (y * W + x) * C; - unsigned int dst_idx = y * maxOutW + x; + unsigned int dst_idx = y * max_roi_width + x; // copy float3 pixels to dst - const uchar *inp_img = &inp_image_u8[n * imgOffset]; - float *out_tensor = (float *)output_tensor + n * outImgOffset + dst_buf_offset; + const uchar *inp_img = &inp_image_u8[n * img_offset]; + float *out_tensor = (float *)output_tensor + n * out_img_offset + dst_buf_offset; if (C == 3) { float3 dst; if (reverse_channels) @@ -176,20 +176,20 @@ Hip_CopyInt8ToNCHW_fp16( const int W = nchw.w; const int H = nchw.z; const int C = nchw.y; - const int maxOutH = out_dims.x; - const int maxOutW = out_dims.y; - const int imgOffset = C * W * H; - const int outImgOffset = C * maxOutW * maxOutH; - unsigned int cstride = maxOutW * maxOutH; + const int max_roi_height = out_dims.x; + const int max_roi_width = out_dims.y; + const int img_offset = C * W * H; + const int out_img_offset = C * max_roi_width * max_roi_height; + unsigned int cstride = max_roi_width * max_roi_height; - if ((x >= maxOutW) || (y >= maxOutH)) + if ((x >= max_roi_width) || (y >= max_roi_height)) return; for (unsigned int n = 0; n < nchw.x; n++) { - __half *out_tensor = (__half *)output_tensor + n * outImgOffset + dst_buf_offset; - const uchar *inp_img = &inp_image_u8[n * imgOffset]; + __half *out_tensor = (__half *)output_tensor + n * out_img_offset + dst_buf_offset; + const uchar *inp_img = &inp_image_u8[n * img_offset]; unsigned int src_idx = (y * W + x) * C; // copy float3 pixels to dst - unsigned int dst_idx = y * maxOutW + x; + unsigned int dst_idx = y * max_roi_width + x; if (C == 3) { float3 dst; if (reverse_channels) @@ -224,25 +224,25 @@ int HipExecCopyInt8ToNHWC( unsigned int fp16, const unsigned max_output_height, const unsigned max_output_width) { - int localThreads_x = 16, localThreads_y = 16; + int local_threads_x = 16, local_threads_y = 16; uint2 out_dims; if ((max_output_height == 0) || (max_output_width == 0)) out_dims = make_uint2(h, w); else out_dims = make_uint2(max_output_height, max_output_width); - int globalThreads_x = w, globalThreads_y = h; + int global_threads_x = w, global_threads_y = h; if (!fp16) { hipLaunchKernelGGL(Hip_CopyInt8ToNHWC_fp32, - dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), - dim3(localThreads_x, localThreads_y), + dim3(ceil((float)global_threads_x / local_threads_x), ceil((float)global_threads_y / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } else { hipLaunchKernelGGL(Hip_CopyInt8ToNHWC_fp16, - dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), - dim3(localThreads_x, localThreads_y), + dim3(ceil((float)global_threads_x / local_threads_x), ceil((float)global_threads_y / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), @@ -270,25 +270,25 @@ int HipExecCopyInt8ToNCHW( unsigned int fp16, const unsigned max_output_height, const unsigned max_output_width) { - int localThreads_x = 16, localThreads_y = 16; + int local_threads_x = 16, local_threads_y = 16; uint2 out_dims; if ((max_output_height == 0) || (max_output_width == 0)) out_dims = make_uint2(h, w); else out_dims = make_uint2(max_output_height, max_output_width); - int globalThreads_x = w, globalThreads_y = h; + int global_threads_x = w, global_threads_y = h; if (!fp16) { hipLaunchKernelGGL(Hip_CopyInt8ToNCHW_fp32, - dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), - dim3(localThreads_x, localThreads_y), + dim3(ceil((float)global_threads_x / local_threads_x), ceil((float)global_threads_y / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), reverse_channels); } else { hipLaunchKernelGGL(Hip_CopyInt8ToNCHW_fp16, - dim3(ceil((float)globalThreads_x / localThreads_x), ceil((float)globalThreads_y / localThreads_y)), - dim3(localThreads_x, localThreads_y), + dim3(ceil((float)global_threads_x / local_threads_x), ceil((float)global_threads_y / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream, (const uchar *)inp_image_u8, output_tensor, dst_buf_offset, make_uint4(n, c, h, w), out_dims, make_float3(multiplier0, multiplier1, multiplier2), make_float3(offset0, offset1, offset2), diff --git a/rocAL/rocAL_hip/rocal_hip_kernels.h b/rocAL/rocAL_hip/rocal_hip_kernels.h index 0db801f59..a089c904e 100644 --- a/rocAL/rocAL_hip/rocal_hip_kernels.h +++ b/rocAL/rocAL_hip/rocal_hip_kernels.h @@ -39,8 +39,8 @@ int HipExecCopyInt8ToNHWC( float offset2, unsigned int reverse_channels, unsigned int fp16, - const unsigned max_output_height = 0, - const unsigned max_output_width = 0); + const unsigned max_roi_height = 0, + const unsigned max_roi_width = 0); int HipExecCopyInt8ToNCHW( hipStream_t stream, @@ -59,5 +59,5 @@ int HipExecCopyInt8ToNCHW( float offset2, unsigned int reverse_channels, unsigned int fp16, - const unsigned max_output_height = 0, - const unsigned max_output_width = 0); + const unsigned max_roi_height = 0, + const unsigned max_roi_width = 0); diff --git a/rocAL/source/api/rocal_api_data_transfer.cpp b/rocAL/source/api/rocal_api_data_transfer.cpp index a3e3088cf..4202de1a3 100644 --- a/rocAL/source/api/rocal_api_data_transfer.cpp +++ b/rocAL/source/api/rocal_api_data_transfer.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext p_context, void* out_ptr, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type, int max_height, int max_width) { + bool reverse_channels, RocalOutputMemType output_mem_type, int max_roi_height, int max_roi_width) { auto context = static_cast(p_context); try { if (tensor_format != ROCAL_NHWC && tensor_format != ROCAL_NCHW) @@ -42,7 +42,7 @@ rocalToTensor(RocalContext p_context, void* out_ptr, RocalTensorLayout tensor_fo auto tensor_layout = (tensor_format == ROCAL_NHWC) ? RocalTensorlayout::NHWC : RocalTensorlayout::NCHW; auto tensor_output_data_type = (tensor_output_type == ROCAL_FP32) ? RocalTensorDataType::FP32 : RocalTensorDataType::FP16; context->master_graph->to_tensor(out_ptr, tensor_layout, multiplier0, multiplier1, multiplier2, - offset0, offset1, offset2, reverse_channels, tensor_output_data_type, output_mem_type, max_height, max_width); + offset0, offset1, offset2, reverse_channels, tensor_output_data_type, output_mem_type, max_roi_height, max_roi_width); } catch (const std::exception& e) { context->capture_error(e.what()); ERR(e.what()) diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index 6e3620247..b63f568d6 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -452,7 +452,7 @@ MasterGraph::timing() { MasterGraph::Status MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, - float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_height, uint max_width) { + float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_roi_height, uint max_roi_width) { if (no_more_processed_data()) return MasterGraph::Status::NO_MORE_DATA; @@ -474,9 +474,9 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier const size_t h = dims[1]; const size_t w = dims[2]; const size_t single_output_tensor_size = output_tensor_info.data_size(); - if ((max_height == 0) || (max_width == 0)) { - max_height = h; - max_width = w; + if ((max_roi_height == 0) || (max_roi_width == 0)) { + max_roi_height = h; + max_roi_width = w; } #if ENABLE_OPENCL @@ -560,11 +560,11 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier auto img_buffer = out_tensor; if (format == RocalTensorlayout::NHWC) { HipExecCopyInt8ToNHWC(_device.resources()->hip_stream, (const void *)img_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_roi_height, max_roi_width); } else { HipExecCopyInt8ToNCHW(_device.resources()->hip_stream, (const void *)img_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_roi_height, max_roi_width); } dest_buf_offset += single_output_tensor_size; } @@ -597,11 +597,11 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier if (format == RocalTensorlayout::NHWC) { HipExecCopyInt8ToNHWC(_device.resources()->hip_stream, (const void *)_output_tensor_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_roi_height, max_roi_width); } else { HipExecCopyInt8ToNCHW(_device.resources()->hip_stream, (const void *)_output_tensor_buffer, out_ptr, dest_buf_offset, n, c, h, w, - multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_height, max_width); + multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, reverse_channels, fp16, max_roi_height, max_roi_width); } dest_buf_offset += single_output_tensor_size; } @@ -618,8 +618,8 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier auto num_threads = _cpu_num_threads * 2; for (auto &&out_tensor : output_buffers) { unsigned int single_tensor_size = w * c * h; - unsigned int channel_size = max_width * max_height; - unsigned int output_single_tensor_size = max_height * max_width * c; + unsigned int channel_size = max_roi_width * max_roi_height; + unsigned int output_single_tensor_size = max_roi_height * max_roi_width * c; unsigned int input_width_stride = w * c; #pragma omp parallel for num_threads(num_threads) for (unsigned int batch_count = 0; batch_count < n; batch_count++) { @@ -675,10 +675,10 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 padd0 = _mm256_set1_ps(offset0); __m256 padd1 = _mm256_set1_ps(offset1); __m256 padd2 = _mm256_set1_ps(offset2); - uint alignedLength = (max_width & ~7); // multiple of 8 + uint alignedLength = (max_roi_width & ~7); // multiple of 8 __m256 fR, fG, fB; - for (uint row = 0; row < max_height; row++) { + for (uint row = 0; row < max_roi_height; row++) { unsigned char *in_buffer_row = reinterpret_cast(in_buffer) + (row * input_width_stride); uint col = 0; for (; col < alignedLength; col += 8) { @@ -698,7 +698,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier R_buf += 8; in_buffer_row += 24; } - for (; col < max_width; col++, in_buffer_row += 3) { + for (; col < max_roi_width; col++, in_buffer_row += 3) { *B_buf++ = (in_buffer_row[0] * multiplier0) + offset0; *G_buf++ = (in_buffer_row[1] * multiplier1) + offset1; *R_buf++ = (in_buffer_row[2] * multiplier2) + offset1; @@ -739,11 +739,11 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier __m256 padd0 = _mm256_set1_ps(offset0); __m256 padd1 = _mm256_set1_ps(offset1); __m256 padd2 = _mm256_set1_ps(offset2); - uint alignedLength = (max_width & ~7); // multiple of 8 + uint alignedLength = (max_roi_width & ~7); // multiple of 8 __m256 fR, fG, fB; __m128i tempR, tempG, tempB; - for (uint row = 0; row < max_height; row++) { + for (uint row = 0; row < max_roi_height; row++) { unsigned char *in_buffer_row = reinterpret_cast(in_buffer) + (row * input_width_stride); uint col = 0; for (; col < alignedLength; col += 8) { @@ -766,7 +766,7 @@ MasterGraph::to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier R_buf_16 += 8; in_buffer_row += 24; } - for (; col < max_width; col++, in_buffer_row += 3) { + for (; col < max_roi_width; col++, in_buffer_row += 3) { *B_buf_16++ = (half)(in_buffer_row[0] * multiplier0) + offset0; *G_buf_16++ = (half)(in_buffer_row[1] * multiplier1) + offset1; *R_buf_16++ = (half)(in_buffer_row[2] * multiplier2) + offset2; diff --git a/rocAL_pybind/amd/rocal/pipeline.py b/rocAL_pybind/amd/rocal/pipeline.py index b0ee8f440..fc454f97d 100644 --- a/rocAL_pybind/amd/rocal/pipeline.py +++ b/rocAL_pybind/amd/rocal/pipeline.py @@ -146,9 +146,9 @@ def define_graph(self): def get_handle(self): return self._handle - def copyToExternalTensor(self, array, multiplier, offset, reverse_channels, tensor_format, tensor_dtype, max_height=0, max_width=0): + def copyToExternalTensor(self, array, multiplier, offset, reverse_channels, tensor_format, tensor_dtype, max_roi_height=0, max_roi_width=0): b.rocalToTensor(self._handle, ctypes.c_void_p(array.data_ptr()), tensor_format, tensor_dtype, - multiplier[0], multiplier[1], multiplier[2], offset[0], offset[1], offset[2], (1 if reverse_channels else 0), self._output_memory_type, max_height, max_width) + multiplier[0], multiplier[1], multiplier[2], offset[0], offset[1], offset[2], (1 if reverse_channels else 0), self._output_memory_type, max_roi_height, max_roi_width) def get_one_hot_encoded_labels(self, array, device): if device == "cpu": diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index b00eb1f80..2c446f274 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -87,12 +87,12 @@ py::object wrapper_image_name(RocalContext context, int array_len) { py::object wrapper_copy_to_tensor(RocalContext context, py::object p, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type, uint max_height, uint max_width) { + bool reverse_channels, RocalOutputMemType output_mem_type, uint max_roi_height, uint max_roi_width) { auto ptr = ctypes_void_ptr(p); // call pure C++ function int status = rocalToTensor(context, ptr, tensor_format, tensor_output_type, multiplier0, multiplier1, multiplier2, offset0, offset1, offset2, - reverse_channels, output_mem_type, max_height, max_width); + reverse_channels, output_mem_type, max_roi_height, max_roi_width); return py::cast(Py_None); } From 166d9ab1343eb1012efbc009f0a453f935b1189d Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Tue, 31 Oct 2023 17:03:23 +0000 Subject: [PATCH 10/33] Adding min_max scaling mode comment --- rocAL/source/api/rocal_api_augmentation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index 0988b58ed..33fb5b57a 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -554,10 +554,10 @@ RocalTensor ROCAL_API_CALL try { if ((dest_width | dest_height | resize_longer | resize_shorter) == 0) THROW("Atleast one size 'dest_width' or 'dest_height' or 'resize_shorter' or 'resize_longer' must be specified") - // Specifying dest width and height along with Resize_shorter and resize_longer can be used together in case of MIN_MAX_SCALING_MODE - for other scaling modes, this throws an error + // MaskRCNN training uses a new resize scaling mode - MIN_MAX_SCALING_MODE where min_size and max_size is passed and the final output size is calculated from the image size + // Only in the case of MIN_MAX_SCALING_MODE, both resize_shorter and resize_longer values can be passed together if ((dest_width | dest_height) && (resize_longer | resize_shorter) && (scaling_mode != RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX)) THROW("Only one method of specifying size can be used \ndest_width and/or dest_height\nresize_shorter\nresize_longer") - // Resize_shorter and resize_longer can be used together in case of MIN_MAX_SCALING_MODE - for other scaling modes, this throws an error if (resize_longer && resize_shorter && scaling_mode != RocalResizeScalingMode::ROCAL_SCALING_MODE_MIN_MAX) THROW("'resize_longer' and 'resize_shorter' can only be passed together for min max scaling mode") From e49fece590a27dfc54a4f0e488f0e3d0d5dd5494 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Wed, 1 Nov 2023 06:39:02 +0000 Subject: [PATCH 11/33] Removing unused vector in coco reader --- rocAL/include/readers/image/coco_file_source_reader.h | 1 - 1 file changed, 1 deletion(-) diff --git a/rocAL/include/readers/image/coco_file_source_reader.h b/rocAL/include/readers/image/coco_file_source_reader.h index ffa35caea..fd14c5061 100644 --- a/rocAL/include/readers/image/coco_file_source_reader.h +++ b/rocAL/include/readers/image/coco_file_source_reader.h @@ -78,7 +78,6 @@ class COCOFileSourceReader : public Reader { struct dirent *_entity; std::vector _file_names, _sorted_file_names; std::vector _aspect_ratios; - std::vector _files; unsigned _curr_file_idx; FILE *_current_fPtr; std::ifstream _current_ifs; From e6e24bd85ec5d1f6d0990bf2b63b53f939416b84 Mon Sep 17 00:00:00 2001 From: fgladwin Date: Thu, 2 Nov 2023 14:03:37 -0400 Subject: [PATCH 12/33] Improve code readability Add appropriate comments --- rocAL/include/api/rocal_api_meta_data.h | 6 +++--- rocAL/include/pipeline/master_graph.h | 14 +++++++------- rocAL/source/meta_data/bounding_box_graph.cpp | 2 +- rocAL/source/pipeline/master_graph.cpp | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index d339944c7..5d043323e 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -309,11 +309,11 @@ extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, Roc extern "C" void ROCAL_API_CALL rocalBoxIouMatcher(RocalContext p_context, std::vector& anchors, float criteria, float high_threshold, float low_threshold, bool allow_low_quality_matches = true); -/*! \brief API to return the matched idices for the bounding box and anchors +/*! \brief API to return the matched indices for the bounding box and anchors * \ingroup group_rocal_meta_data - * \param [in] rocal_context rocAL context + * \param [in] p_context rocAL context * \return RocalTensorList of matched indices */ -extern "C" RocalTensorList ROCAL_API_CALL rocalGetMatchedIndices(RocalContext rocal_context); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetMatchedIndices(RocalContext p_context); #endif // MIVISIONX_ROCAL_API_META_DATA_H diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index 5b03e0018..5744609a8 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -46,11 +46,11 @@ THE SOFTWARE. #include "randombboxcrop_meta_data_reader.h" #include "rocal_api_types.h" #define MAX_STRING_LENGTH 100 -#define MAX_OBJECTS 50 // Setting an arbitrary value 50.(Max number of objects/image in COCO dataset is 93) +#define MAX_OBJECTS 50 // Setting an arbitrary value 50.(Max number of objects/image in COCO dataset is 93) #define BBOX_COUNT 4 -#define MAX_NUM_ANCHORS 8732 // Num of bbox achors used in SSD training +#define MAX_SSD_ANCHORS 8732 // Num of bbox achors used in SSD training #define MAX_MASK_BUFFER 10000 -#define MAX_ANCHORS 120087 // Num of bbox achors used in Retinanet training +#define MAX_RETINANET_ANCHORS 120087 // Num of bbox achors used in Retinanet training #if ENABLE_SIMD #if _WIN32 @@ -209,10 +209,10 @@ class MasterGraph { std::vector _means, _stds; //_means: [x y w h] mean values for normalization _stds: [x y w h] standard deviations for offset normalization. bool _augmentation_metanode = false; // box IoU matcher variables - bool _is_box_iou_matcher = false; // bool variable to set the box iou matcher - float _high_threshold = 0.5f; // Max IoU threshold - float _low_threshold = 0.4f; // Min IoU threshold - bool _allow_low_quality_matches = true; // Set to true to include low quality matches in matched idx generation + bool _is_box_iou_matcher = false; // bool variable to set the box iou matcher + float _high_threshold = 0.5f; // Max IoU threshold + float _low_threshold = 0.4f; // Min IoU threshold + bool _allow_low_quality_matches = true; // Set to true to include low quality matches in matched idx generation #if ENABLE_HIP BoxEncoderGpu *_box_encoder_gpu = nullptr; #endif diff --git a/rocAL/source/meta_data/bounding_box_graph.cpp b/rocAL/source/meta_data/bounding_box_graph.cpp index 9e7d19d72..bc15ab0ac 100644 --- a/rocAL/source/meta_data/bounding_box_graph.cpp +++ b/rocAL/source/meta_data/bounding_box_graph.cpp @@ -262,7 +262,7 @@ void BoundingBoxGraph::update_box_iou_matcher(std::vector *anchors, int * if (allow_low_quality_matches) { for (unsigned int anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { // if the element is found - if (fabs(bbox_iou[anchor_idx] - best_bbox_iou) < 1e-6) + if (fabs(bbox_iou[anchor_idx] - best_bbox_iou) < 1e-6) // Compare the IOU values and check if they are equal with a tolerance of 1e-6 low_quality_preds[anchor_idx] = anchor_idx; } } diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index 3dd7ab042..d982d188f 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -1022,7 +1022,7 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c _meta_data_reader->read_all(source_path); if (!ltrb_bbox) _augmented_meta_data->set_xywh_bbox(); std::vector dims; - size_t max_objects = static_cast(is_box_encoder ? MAX_NUM_ANCHORS : MAX_OBJECTS); + size_t max_objects = static_cast(is_box_encoder ? MAX_SSD_ANCHORS : MAX_OBJECTS); dims = {max_objects}; auto default_labels_info = TensorInfo(std::move(dims), _mem_type, RocalTensorDataType::INT32); // Create default labels Info default_labels_info.set_metadata(); @@ -1043,7 +1043,7 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c } if (is_box_iou_matcher) { _is_box_iou_matcher = true; - dims = {MAX_ANCHORS}; + dims = {MAX_RETINANET_ANCHORS}; default_matches_info = TensorInfo(std::move(dims), _mem_type, RocalTensorDataType::INT32); // Create default matches info default_matches_info.set_metadata(); _meta_data_buffer_size.emplace_back(_user_batch_size * default_matches_info.data_size()); From e7e719d5d7fa221cde33ce8d35951ffbeb90dbd9 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Thu, 9 Nov 2023 10:42:31 +0000 Subject: [PATCH 13/33] Add the test cases for numpy reader --- .../rocAL_unittests/rocAL_unittests.cpp | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp b/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp index e48fe1d78..066aa4199 100644 --- a/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp +++ b/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp @@ -319,6 +319,12 @@ int test(int test_case, int reader_type, const char *path, const char *outName, rocalCreateMXNetReader(handle, path, true); decoded_output = rocalMXNetRecordSource(handle, path, color_format, num_threads, false, false, false, ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED, decode_max_width, decode_max_height); } break; + case 12: // Numpy reader + { + std::cout << ">>>>>>> Running Numpy reader" << std::endl; + pipeline_type = 4; + decoded_output = rocalNumpyFileSource(handle, path, num_threads, false, false, false, ROCAL_USE_MAX_SIZE); + } break; default: { std::cout << ">>>>>>> Running IMAGE READER" << std::endl; pipeline_type = 1; @@ -766,6 +772,53 @@ int test(int test_case, int reader_type, const char *path, const char *outName, } } } break; + case 4: { // numpy reader pipeline + RocalTensorList output_tensor_list; + output_tensor_list = rocalGetOutputTensors(handle); + for (int idx = 0; idx < output_tensor_list->size(); idx++) { + unsigned char *out_buffer; + if (output_tensor_list->at(idx)->data_type() == RocalTensorOutputType::ROCAL_FP32) { + float *out_f_buffer; + std::cout << "Creating float buffer of "; + for (auto x : output_tensor_list->at(idx)->shape()) + std::cout << x << " x "; + std::cout << "shape\n"; + if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_GPU) { + out_f_buffer = (float *)malloc(output_tensor_list->at(idx)->data_size()); + output_tensor_list->at(idx)->copy_data(out_f_buffer); + } else if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_CPU) + out_f_buffer = (float *)output_tensor_list->at(idx)->buffer(); + + out_buffer = (unsigned char *)malloc(output_tensor_list->at(idx)->data_size() / 4); + // convert_float_to_uchar_buffer(out_f_buffer, out_buffer, output_tensor_list->at(idx)->data_size() / 4); + } else if (output_tensor_list->at(idx)->data_type() == RocalTensorOutputType::ROCAL_FP16) { + half *out_f16_buffer; + std::cout << "Creating float16 buffer of "; + for (auto x : output_tensor_list->at(idx)->shape()) + std::cout << x << " x "; + std::cout << "shape\n"; + if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_GPU) { + out_f16_buffer = (half *)malloc(output_tensor_list->at(idx)->data_size()); + output_tensor_list->at(idx)->copy_data(out_f16_buffer); + } else if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_CPU) + out_f16_buffer = (half *)output_tensor_list->at(idx)->buffer(); + + out_buffer = (unsigned char *)malloc(output_tensor_list->at(idx)->data_size() / 2); + // convert_float_to_uchar_buffer(out_f16_buffer, out_buffer, output_tensor_list->at(idx)->data_size() / 2); + } else { + std::cout << "Creating uchar buffer of "; + for (auto x : output_tensor_list->at(idx)->shape()) + std::cout << x << " x "; + std::cout << "shape\n"; + if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_GPU) { + out_buffer = (unsigned char *)malloc(output_tensor_list->at(idx)->data_size()); + output_tensor_list->at(idx)->copy_data(out_buffer); + } else if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_CPU) + out_buffer = (unsigned char *)(output_tensor_list->at(idx)->buffer()); + } + } + std::cout << "Copied numpy data to buffers\n"; + } break; default: { std::cout << "Not a valid pipeline type ! Exiting!\n"; return -1; From 935c769b3d6e94e0b6de0f8d746450683d0d42d7 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Mon, 27 Nov 2023 13:31:38 +0000 Subject: [PATCH 14/33] Fixing bug with numpy reader shuffle --- rocAL/source/readers/image/numpy_data_reader.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index 1f1cc7b6b..7f57beaee 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include +#include #include #include @@ -375,7 +376,20 @@ int NumpyDataReader::release() { void NumpyDataReader::reset() { _shuffle_time.start(); - if (_shuffle) std::random_shuffle(_file_names.begin(), _file_names.end()); + if (_shuffle) { + std::vector shuffled_filenames; + std::vector shuffled_headers; + std::vector indexes(_file_names.size()); + std::iota(indexes.begin(), indexes.end(), 0); + // Shuffle the index vector and use the index to fetch batch size elements for decoding + std::random_shuffle(indexes.begin(), indexes.end()); + for (auto const idx : indexes) { + shuffled_filenames.push_back(_file_names[idx]); + shuffled_headers.push_back(_file_headers[idx]); + } + _file_names = shuffled_filenames; + _file_headers = shuffled_headers; + } _shuffle_time.end(); _read_counter = 0; _curr_file_idx = 0; From bcb050fc7f525bc2c5911371526b3ac246e4fdcf Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Mon, 27 Nov 2023 13:42:20 +0000 Subject: [PATCH 15/33] Resizing file headers after last batch padding --- rocAL/source/readers/image/numpy_data_reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index 7f57beaee..a8917d997 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -71,6 +71,7 @@ Reader::Status NumpyDataReader::initialize(ReaderConfig desc) { replicate_last_batch_to_pad_partial_shard(); } } + _file_headers.resize(_file_names.size()); // shuffle dataset if set _shuffle_time.start(); if (ret == Reader::Status::OK && _shuffle) @@ -434,7 +435,6 @@ Reader::Status NumpyDataReader::subfolder_reading() { replicate_last_image_to_fill_last_shard(); LOG("NumpyDataReader ShardID [" + TOSTR(_shard_id) + "] Replicated " + _folder_path + _last_file_name + " " + TOSTR((_batch_count - _in_batch_read_count)) + " times to fill the last batch") } - _file_headers.resize(_file_names.size()); if (!_file_names.empty()) LOG("NumpyDataReader ShardID [" + TOSTR(_shard_id) + "] Total of " + TOSTR(_file_names.size()) + " images loaded from " + _full_path) return ret; From 15d46469607eae23bfe458b809b017baf40b9321 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Fri, 8 Dec 2023 17:54:35 +0000 Subject: [PATCH 16/33] Adding changes for - normalize and transpose kernel support to rocAL - generic ROI changes --- rocAL/include/api/rocal_api_augmentation.h | 38 ++++++++ .../augmentations/augmentations_nodes.h | 2 + .../effects_augmentations/node_normalize.h | 46 ++++++++++ .../geometry_augmentations/node_transpose.h | 40 +++++++++ rocAL/include/pipeline/tensor.h | 13 ++- rocAL/source/api/rocal_api_augmentation.cpp | 60 +++++++++++++ .../effects_augmentations/node_normalize.cpp | 90 +++++++++++++++++++ .../geometry_augmentations/node_transpose.cpp | 51 +++++++++++ rocAL/source/pipeline/tensor.cpp | 18 ++-- 9 files changed, 344 insertions(+), 14 deletions(-) create mode 100644 rocAL/include/augmentations/effects_augmentations/node_normalize.h create mode 100644 rocAL/include/augmentations/geometry_augmentations/node_transpose.h create mode 100644 rocAL/source/augmentations/effects_augmentations/node_normalize.cpp create mode 100644 rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index d236073fa..b953211ec 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -329,6 +329,20 @@ extern "C" RocalTensor ROCAL_API_CALL rocalFlipFixed(RocalContext context, Rocal RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/*! \brief Transposes the tensors by reordering the dimensions based on the perm parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] perm Permutation of the dimensions of the input + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalTranspose(RocalContext context, RocalTensor input, std::vector perm, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + /*! \brief Applies blur effect to images. * \ingroup group_rocal_augmentations * \param [in] context Rocal context @@ -997,6 +1011,30 @@ extern "C" RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext cont RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/*! \brief Performs normalization on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] axes axes list for tensor normalization + * \param [in] mean mean value (specified for each channel) for tensor normalization + * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization + * \param [in] scale scale value (specified for each channel) for tensor normalization + * \param [in] shift shift value (specified for each channel) for tensor normalization + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalNormalize(RocalContext context, RocalTensor input, + std::vector &axes, + std::vector &mean, + std::vector &std_dev, + bool is_output, + float scale = 1.0, float shift = 0.0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + /*! \brief Crops images. * \ingroup group_rocal_augmentations * \param [in] context Rocal context diff --git a/rocAL/include/augmentations/augmentations_nodes.h b/rocAL/include/augmentations/augmentations_nodes.h index ef6beff32..e9344b4d4 100644 --- a/rocAL/include/augmentations/augmentations_nodes.h +++ b/rocAL/include/augmentations/augmentations_nodes.h @@ -57,3 +57,5 @@ THE SOFTWARE. #include "node_sequence_rearrange.h" #include "node_gaussian_noise.h" #include "node_slice.h" +#include "node_transpose.h" +#include "node_normalize.h" diff --git a/rocAL/include/augmentations/effects_augmentations/node_normalize.h b/rocAL/include/augmentations/effects_augmentations/node_normalize.h new file mode 100644 index 000000000..6ad49d08f --- /dev/null +++ b/rocAL/include/augmentations/effects_augmentations/node_normalize.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "parameter_vx.h" + +class NormalizeNode : public Node { + public: + NormalizeNode(const std::vector &inputs, + const std::vector &outputs); + NormalizeNode() = delete; + void init(std::vector &axes, std::vector &mean, std::vector &std_dev, float scale, float shift); + + protected: + void create_node() override; + void update_node() override {}; + + private: + int _axis_mask = 0; + uint _compute_mean, _compute_stddev; + vx_array _mean_vx_array, _stddev_vx_array; + std::vector _axes; + std::vector _mean, _std_dev; + float _scale, _shift; + std::vector> _normalize_roi; +}; \ No newline at end of file diff --git a/rocAL/include/augmentations/geometry_augmentations/node_transpose.h b/rocAL/include/augmentations/geometry_augmentations/node_transpose.h new file mode 100644 index 000000000..d8b6e94c1 --- /dev/null +++ b/rocAL/include/augmentations/geometry_augmentations/node_transpose.h @@ -0,0 +1,40 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "parameter_vx.h" + +class TransposeNode : public Node { + public: + TransposeNode(const std::vector &inputs, const std::vector &outputs); + TransposeNode() = delete; + void init(std::vector perm); + + protected: + void create_node() override; + void update_node() override {}; + + private: + std::vector _perm; + vx_array _perm_array; +}; diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index 26689e03f..71bcc1d98 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -205,10 +205,14 @@ class TensorInfo { get_modified_dims_from_layout(_layout, layout, new_dims); _dims = new_dims; modify_strides(); + _max_shape.assign(_dims.begin() + 1, _dims.end()); } _layout = layout; - if (_layout == RocalTensorlayout::NONE) - set_max_shape(); + if (_layout == RocalTensorlayout::NHWC || _layout == RocalTensorlayout::NDHWC) { + _channels = _dims.back(); + } else if (_layout == RocalTensorlayout::NCHW || _layout == RocalTensorlayout::NCDHW) { + _channels = _dims.at(1); + } } void set_dims(std::vector& new_dims) { if (_num_of_dims == new_dims.size()) { @@ -249,13 +253,14 @@ class TensorInfo { } void modify_dims(RocalTensorlayout layout, std::vector new_dims) { switch (_layout) { - case RocalTensorlayout::NDHWC: { + case RocalTensorlayout::NHWC: + case RocalTensorlayout::NCHW: { _max_shape[0] = _dims[1] = new_dims[0]; _max_shape[1] = _dims[2] = new_dims[1]; _max_shape[2] = _dims[3] = new_dims[2]; - _max_shape[3] = _dims[4] = new_dims[3]; break; } + case RocalTensorlayout::NDHWC: case RocalTensorlayout::NCDHW: { _max_shape[0] = _dims[1] = new_dims[0]; _max_shape[1] = _dims[2] = new_dims[1]; diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index c740eadc5..efd233c93 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -1262,6 +1262,37 @@ rocalSlice( return output; } +RocalTensor ROCAL_API_CALL +rocalTranspose( + RocalContext p_context, + RocalTensor p_input, + std::vector perm, + bool is_output, + RocalTensorLayout output_layout, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input image") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorlayout op_tensor_layout = static_cast(output_layout); + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + TensorInfo output_info = input->info(); + output_info.set_tensor_layout(op_tensor_layout); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + std::shared_ptr transpose_node = context->master_graph->add_node({input}, {output}); + transpose_node->init(perm); + } catch (const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalFlip( RocalContext p_context, @@ -1887,6 +1918,35 @@ rocalColorTwistFixed( return output; } +RocalTensor ROCAL_API_CALL +rocalNormalize(RocalContext p_context, RocalTensor p_input, std::vector &axes, + std::vector& mean, std::vector& std_dev, bool is_output, + float scale, float shift, + RocalTensorLayout output_layout, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input tensor") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorlayout op_tensor_layout = static_cast(output_layout); + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + TensorInfo output_info = input->info(); + output_info.set_tensor_layout(op_tensor_layout); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + std::shared_ptr normalize_node = context->master_graph->add_node({input}, {output}); + normalize_node->init(axes, mean, std_dev, scale, shift); + } catch (const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext p_context, RocalTensor p_input, unsigned crop_height, unsigned crop_width, float start_x, float start_y, std::vector& mean, diff --git a/rocAL/source/augmentations/effects_augmentations/node_normalize.cpp b/rocAL/source/augmentations/effects_augmentations/node_normalize.cpp new file mode 100644 index 000000000..16bb59798 --- /dev/null +++ b/rocAL/source/augmentations/effects_augmentations/node_normalize.cpp @@ -0,0 +1,90 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "node_normalize.h" + +#include +#include + +#include "exception.h" + +NormalizeNode::NormalizeNode(const std::vector &inputs, const std::vector &outputs) : Node(inputs, outputs) {} + +void NormalizeNode::create_node() { + if (_node) + return; + + _compute_mean = _mean.size() ? 0 : 1; + _compute_stddev = _std_dev.size() ? 0 : 1; + + uint mean_stddev_array_size = _mean.size(); + std::vector mean_vec, stddev_vec; + mean_vec.resize(_batch_size * mean_stddev_array_size, _mean[0]); + stddev_vec.resize(_batch_size * mean_stddev_array_size, _std_dev[0]); + + if (!_compute_mean && !_compute_stddev) + for (uint i = 0; i < _batch_size; i++) { + for (uint j = 0; j < mean_stddev_array_size; j++) { + mean_vec[i * mean_stddev_array_size + j] = _mean[j]; + stddev_vec[i * mean_stddev_array_size + j] = _std_dev[j]; + } + } + vx_status status = VX_SUCCESS; + if (!_compute_mean) { + _mean_vx_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, mean_vec.size()); + status |= vxAddArrayItems(_mean_vx_array, mean_vec.size(), mean_vec.data(), sizeof(vx_float32)); + if (status != 0) + THROW(" vxAddArrayItems failed in the normalize node (vxExtRppNormalize) node: " + TOSTR(status) + " " + TOSTR(status)) + } + + if (!_compute_stddev) { + _stddev_vx_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, stddev_vec.size()); + status |= vxAddArrayItems(_stddev_vx_array, stddev_vec.size(), stddev_vec.data(), sizeof(vx_float32)); + if (status != 0) + THROW(" vxAddArrayItems failed in the normalize node (vxExtRppNormalize) node: " + TOSTR(status) + " " + TOSTR(status)) + } + vx_scalar axis_mask = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &_axis_mask); + vx_scalar scale = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_scale); + vx_scalar shift = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_shift); + vx_scalar compute_mean = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, &_compute_mean); + vx_scalar compute_stddev = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, &_compute_stddev); + int input_layout = static_cast(_inputs[0]->info().layout()); + int output_layout = static_cast(_outputs[0]->info().layout()); + int roi_type = static_cast(_inputs[0]->info().roi_type()); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + vx_scalar output_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &output_layout); + vx_scalar roi_type_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &roi_type); + + _node = vxExtRppNormalize(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), axis_mask, + _mean_vx_array, _stddev_vx_array, compute_mean, compute_stddev, scale, shift, input_layout_vx, output_layout_vx, roi_type_vx); + if ((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Error adding the crop mirror normalize (vxExtRppNormalize) failed: " + TOSTR(status)) +} + +void NormalizeNode::init(std::vector &axes, std::vector &mean, std::vector &std_dev, float scale, float shift) { + _mean = mean; + _std_dev = std_dev; + _scale = scale; + _shift = shift; + for (unsigned d = 0; d < axes.size(); d++) + _axis_mask |= (1 << axes[d]); +} \ No newline at end of file diff --git a/rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp b/rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp new file mode 100644 index 000000000..9e4376e4b --- /dev/null +++ b/rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp @@ -0,0 +1,51 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "node_transpose.h" +#include "exception.h" + +TransposeNode::TransposeNode(const std::vector &inputs, const std::vector &outputs) : Node(inputs, outputs) {} + +void TransposeNode::create_node() { + if (_node) + return; + + int input_layout = static_cast(_inputs[0]->info().layout()); + int output_layout = static_cast(_outputs[0]->info().layout()); + int roi_type = static_cast(_inputs[0]->info().roi_type()); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + vx_scalar output_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &output_layout); + vx_scalar roi_type_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &roi_type); + _perm_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, _perm.size()); + vx_status status = VX_SUCCESS; + status |= vxAddArrayItems(_perm_array, _perm.size(), _perm.data(), sizeof(vx_uint32)); + + _node = vxExtRppTranspose(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), + _perm_array, input_layout_vx, output_layout_vx, roi_type_vx); + if ((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Adding the transpose (vxExtRppTranspose) node failed: " + TOSTR(status)) +} + +void TransposeNode::init(std::vector perm) { + _perm = perm; +} \ No newline at end of file diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index 5f0a53a42..f0ac4eaae 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -116,17 +116,17 @@ void TensorInfo::reset_tensor_roi_buffers() { auto roi_size = (_layout == RocalTensorlayout::NFCHW || _layout == RocalTensorlayout::NFHWC) ? _dims[0] * _dims[1] : _batch_size; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions allocate_host_or_pinned_mem((void **)&roi_buf, roi_size * roi_no_of_dims * 2 * sizeof(unsigned), _mem_type); _roi.set_ptr(roi_buf, _mem_type, roi_size, roi_no_of_dims); - if (_layout == RocalTensorlayout::NCDHW || _layout == RocalTensorlayout::NDHWC) { - for (unsigned i = 0; i < _batch_size; i++) { - unsigned *tensor_shape = _roi[i].end; - tensor_shape[i] = _max_shape[i]; - } - } else if (_is_image) { + if (_is_image) { Roi2DCords *roi = _roi.get_2D_roi(); for (unsigned i = 0; i < _batch_size; i++) { roi[i].xywh.w = _max_shape.at(0); roi[i].xywh.h = _max_shape.at(1); } + } else { + for (unsigned i = 0; i < _batch_size; i++) { + unsigned *tensor_shape = _roi[i].end; + tensor_shape[i] = _max_shape[i]; + } } } @@ -221,10 +221,8 @@ void Tensor::update_tensor_roi(const std::vector> &shape) THROW("The number of dims to be updated and the num of dims of tensor info does not match") unsigned *tensor_shape = _info.roi()[i].end; - if (_info.layout() == RocalTensorlayout::NCDHW || _info.layout() == RocalTensorlayout::NDHWC) { - for (unsigned j = 0; j < max_shape.size(); j++) { - tensor_shape[j] = shape[i][j] > max_shape[j] ? max_shape[j] : shape[i][j]; - } + for (unsigned j = 0; j < max_shape.size(); j++) { + tensor_shape[j] = shape[i][j] > max_shape[j] ? max_shape[j] : shape[i][j]; } } } From 4380c29b86361cf179c603722e097bf42e9e1d0e Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Fri, 8 Dec 2023 18:51:40 +0000 Subject: [PATCH 17/33] Adding pybind changes for deepcam integration --- rocAL_pybind/amd/rocal/fn.py | 13 ++++ .../examples/rocAL_api_numpy_reader.py | 69 +++++++------------ rocAL_pybind/rocal_pybind.cpp | 4 ++ 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/rocAL_pybind/amd/rocal/fn.py b/rocAL_pybind/amd/rocal/fn.py index 371218313..703769a9a 100644 --- a/rocAL_pybind/amd/rocal/fn.py +++ b/rocAL_pybind/amd/rocal/fn.py @@ -1148,3 +1148,16 @@ def random_object_bbox(*inputs, format='anchor_shape', background=0, cache_objec else: print('Wrong format passed to random_object_bbox') return () + +def transpose(*inputs, perm=[], output_layout=types.NHWC, output_dtype=types.UINT8): + # pybind call arguments + kwargs_pybind = {"input_image": inputs[0], "perm": perm, "is_output": False, "output_layout": output_layout, "output_dtype": output_dtype} + transposed_image = b.transpose(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return (transposed_image) + +def normalize(*inputs, axes=[], mean=[], stddev=[], scale=1.0, shift=0.0, output_layout=types.NHWC, output_dtype=types.UINT8): + # pybind call arguments + kwargs_pybind = {"input_image": inputs[0], "axes": axes, "mean": mean, "stddev": stddev, "is_output": False, + "scale": scale, "shift": shift, "output_layout": output_layout, "output_dtype": output_dtype} + normalized_image = b.normalize(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return (normalized_image) diff --git a/rocAL_pybind/examples/rocAL_api_numpy_reader.py b/rocAL_pybind/examples/rocAL_api_numpy_reader.py index e2961eddc..60c797a31 100644 --- a/rocAL_pybind/examples/rocAL_api_numpy_reader.py +++ b/rocAL_pybind/examples/rocAL_api_numpy_reader.py @@ -10,8 +10,9 @@ import sys import os, glob -val_cases_list = ['00000', '00003', '00005', '00006', '00012', '00024', '00034', '00041', '00044', '00049', '00052', '00056', '00061', '00065', '00066', '00070', '00076', '00078', '00080', '00084', - '00086', '00087', '00092', '00111', '00112', '00125', '00128', '00138', '00157', '00160', '00161', '00162', '00169', '00171', '00176', '00185', '00187', '00189', '00198', '00203', '00206', '00207'] + +MEAN = [0.026144592091441154, -88.3379898071289, -84.62094116210938, -78.56366729736328, -77.72217559814453, 7.33015557974337e-12, 48330.79296875, 87595.4296875, 183.57638549804688, 208.38265991210938, -7.185957863625792e-19, 109.64270782470703, 94.19403076171875, -0.37584438920021057, 9952.041015625, 20.362579345703125] +STDDEV = [108.9710922241211, 174.1948699951172, 173.99221801757812, 155.323486328125, 158.25418090820312, 0.14563894271850586, 58919.42578125, 24443.921875, 64.71000671386719, 77.63092041015625, 3.7348792830016464e-05, 242.97598266601562, 237.60250854492188, 5726.51611328125, 2953.1953125, 51.31494903564453] def load_data(path, files_pattern): data = sorted(glob.glob(os.path.join(path, files_pattern))) @@ -19,19 +20,10 @@ def load_data(path, files_pattern): return data def get_data_split(path: str): - imgs = load_data(path, "*_x.npy") - lbls = load_data(path, "*_y.npy") + imgs = load_data(path, "data-*.npy") + lbls = load_data(path, "label-*.npy") assert len(imgs) == len(lbls), f"Found {len(imgs)} volumes but {len(lbls)} corresponding masks" - imgs_train, lbls_train, imgs_val, lbls_val = [], [], [], [] - for (case_img, case_lbl) in zip(imgs, lbls): - if case_img.split("_")[-2] in val_cases_list: - imgs_val.append(case_img) - lbls_val.append(case_lbl) - else: - imgs_train.append(case_img) - lbls_train.append(case_lbl) - - return imgs_train, imgs_val, lbls_train, lbls_val + return imgs, lbls def main(): if len(sys.argv) < 3: @@ -45,63 +37,52 @@ def main(): except OSError as error: print(error) data_path = sys.argv[1] - if(sys.argv[2] == "cpu"): + data_path1 = sys.argv[2] + if(sys.argv[3] == "cpu"): rocal_cpu = True else: rocal_cpu = False - batch_size = int(sys.argv[3]) + batch_size = int(sys.argv[4]) num_threads = 8 device_id = 0 local_rank = 0 world_size = 1 random_seed = random.SystemRandom().randint(0, 2**32 - 1) - x_train, x_val, y_train, y_val = get_data_split(data_path) + x_train, y_train = get_data_split(data_path) + x_val, y_val = get_data_split(data_path1) import time start = time.time() - pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=2) + pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) with pipeline: numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) - numpy_reader_output1 = fn.readers.numpy(file_root=data_path, files=y_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) - data_output = fn.set_layout(numpy_reader_output, output_layout=types.NCDHW) - label_output = fn.set_layout(numpy_reader_output1, output_layout=types.NCDHW) - [roi_start, roi_end] = fn.random_object_bbox(label_output, format="start_end", k_largest=2, foreground_prob=0.4) - anchor = fn.roi_random_crop(label_output, roi_start=roi_start, roi_end=roi_end, crop_shape=(1, 128, 128, 128)) - data_sliced_output = fn.slice(data_output, anchor=anchor, shape=(1,128,128,128), output_layout=types.NCDHW, output_dtype=types.FLOAT) - label_sliced_output = fn.slice(label_output, anchor=anchor, shape=(1,128,128,128), output_layout=types.NCDHW, output_dtype=types.UINT8) - hflip = fn.random.coin_flip(probability=0.33) - vflip = fn.random.coin_flip(probability=0.33) - dflip = fn.random.coin_flip(probability=0.33) - data_flip_output = fn.flip(data_sliced_output, horizontal=hflip, vertical=vflip, depth=dflip, output_layout=types.NCDHW, output_dtype=types.FLOAT) - label_flip_output = fn.flip(label_sliced_output, horizontal=hflip, vertical=vflip, depth=dflip, output_layout=types.NCDHW, output_dtype=types.UINT8) - brightness = fn.random.uniform(range=[0.7, 1.3]) - add_brightness = fn.random.coin_flip(probability=0.1) - brightness_output = fn.brightness(data_flip_output, brightness=brightness, brightness_shift=0.0, conditional_execution=add_brightness, output_layout=types.NCDHW, output_dtype=types.FLOAT) - add_noise = fn.random.coin_flip(probability=0.5) - std_dev = fn.random.uniform(range=[0.0, 0.1]) - noise_output = fn.gaussian_noise(brightness_output, mean=0.0, std_dev=std_dev, conditional_execution=add_noise, output_layout=types.NCDHW, output_dtype=types.FLOAT) - pipeline.set_outputs(noise_output, label_flip_output) + label_output = fn.readers.numpy(file_root=data_path, files=y_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) + data_output = fn.set_layout(numpy_reader_output, output_layout=types.NHWC) + normalized_output = fn.normalize(data_output, axes=[0,1], mean=MEAN, stddev=STDDEV, output_layout=types.NHWC, output_dtype=types.FLOAT) + transposed_output = fn.transpose(normalized_output, perm=[2,1,0], output_layout=types.NCHW, output_dtype=types.FLOAT) + pipeline.set_outputs(transposed_output, label_output) pipeline.build() pipeline1 = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) with pipeline1: - numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_val, shard_id=local_rank, num_shards=world_size) - numpy_reader_output1 = fn.readers.numpy(file_root=data_path, files=y_val, shard_id=local_rank, num_shards=world_size) - data_output = fn.set_layout(numpy_reader_output, output_layout=types.NCDHW) - label_output = fn.set_layout(numpy_reader_output1, output_layout=types.NCDHW) - pipeline1.set_outputs(data_output, label_output) + numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_val, shard_id=local_rank, num_shards=world_size, seed=random_seed+local_rank) + label_output = fn.readers.numpy(file_root=data_path, files=y_val, shard_id=local_rank, num_shards=world_size, seed=random_seed+local_rank) + data_output = fn.set_layout(numpy_reader_output, output_layout=types.NHWC) + normalized_output = fn.normalize(data_output, axes=[0,1], mean=MEAN, stddev=STDDEV, output_layout=types.NHWC, output_dtype=types.FLOAT) + transposed_output = fn.transpose(normalized_output, perm=[2,1,0], output_layout=types.NCHW, output_dtype=types.FLOAT) + pipeline1.set_outputs(transposed_output, label_output) pipeline1.build() numpyIteratorPipeline = ROCALNumpyIterator(pipeline, device='cpu' if rocal_cpu else 'gpu') print(len(numpyIteratorPipeline)) - valNumpyIteratorPipeline = ROCALNumpyIterator(pipeline1, device='cpu' if rocal_cpu else 'gpu', return_roi=True) + valNumpyIteratorPipeline = ROCALNumpyIterator(pipeline1, device='cpu' if rocal_cpu else 'gpu') print(len(valNumpyIteratorPipeline)) cnt = 0 - for epoch in range(100): + for epoch in range(2): print("+++++++++++++++++++++++++++++EPOCH+++++++++++++++++++++++++++++++++++++",epoch) for i , it in enumerate(numpyIteratorPipeline): print(i, it[0].shape, it[1].shape) diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index c31e49b03..49399d2ef 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -675,5 +675,9 @@ PYBIND11_MODULE(rocal_pybind, m) { py::return_value_policy::reference); m.def("slice", &rocalSlice, py::return_value_policy::reference); + m.def("transpose", &rocalTranspose, + py::return_value_policy::reference); + m.def("normalize", &rocalNormalize, + py::return_value_policy::reference); } } // namespace rocal From 06018412bbeb899ba59554d69ae0c2d66ffa6981 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Fri, 8 Dec 2023 19:56:10 +0000 Subject: [PATCH 18/33] Adding cast augmentation to rocAL --- rocAL/include/api/rocal_api_augmentation.h | 11 +++++ .../augmentations/augmentations_nodes.h | 1 + rocAL/include/augmentations/node_cast.h | 36 +++++++++++++++++ rocAL/source/api/rocal_api_augmentation.cpp | 29 ++++++++++++++ rocAL/source/augmentations/node_cast.cpp | 40 +++++++++++++++++++ rocAL_pybind/amd/rocal/fn.py | 6 +++ rocAL_pybind/rocal_pybind.cpp | 2 + 7 files changed, 125 insertions(+) create mode 100644 rocAL/include/augmentations/node_cast.h create mode 100644 rocAL/source/augmentations/node_cast.cpp diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index b953211ec..4bc38b820 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -1200,6 +1200,17 @@ extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, R RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/** + * \brief Cast input tensor from one data type to another + * \param context Rocal context + * \param input Input tensor + * \param is_output Sets if the output is to be given to user or as intermediate buffer + * \param output_datatype Datatype of the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCast(RocalContext context, RocalTensor input, + bool is_output, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + extern "C" RocalTensor ROCAL_API_CALL rocalSetLayout(RocalContext context, RocalTensor input, RocalTensorLayout output_layout = ROCAL_NONE); diff --git a/rocAL/include/augmentations/augmentations_nodes.h b/rocAL/include/augmentations/augmentations_nodes.h index e9344b4d4..c01fb0691 100644 --- a/rocAL/include/augmentations/augmentations_nodes.h +++ b/rocAL/include/augmentations/augmentations_nodes.h @@ -59,3 +59,4 @@ THE SOFTWARE. #include "node_slice.h" #include "node_transpose.h" #include "node_normalize.h" +#include "node_cast.h" diff --git a/rocAL/include/augmentations/node_cast.h b/rocAL/include/augmentations/node_cast.h new file mode 100644 index 000000000..67930261b --- /dev/null +++ b/rocAL/include/augmentations/node_cast.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "graph.h" + +class CastNode : public Node +{ +public: + CastNode(const std::vector &inputs, const std::vector &outputs); + CastNode() = delete; + +protected: + void create_node() override; + void update_node() override {}; +}; diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index efd233c93..ea1c3344c 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -2326,6 +2326,35 @@ rocalNop( return output; } +RocalTensor ROCAL_API_CALL rocalCast(RocalContext p_context, RocalTensor p_input, + bool is_output, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input tensor") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + + if (input->info().data_type() == op_tensor_datatype) { + output = context->master_graph->create_tensor(input->info(), is_output); + context->master_graph->add_node({input}, {output}); + } else { + TensorInfo output_info = input->info(); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + context->master_graph->add_node({input}, {output}); + } + } catch(const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalSetLayout( RocalContext p_context, diff --git a/rocAL/source/augmentations/node_cast.cpp b/rocAL/source/augmentations/node_cast.cpp new file mode 100644 index 000000000..d1949560e --- /dev/null +++ b/rocAL/source/augmentations/node_cast.cpp @@ -0,0 +1,40 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "node_cast.h" +#include "exception.h" + +CastNode::CastNode(const std::vector &inputs, const std::vector &outputs) : + Node(inputs, outputs) {} + +void CastNode::create_node() { + if(_node) + return; + + _node = vxExtRppCast(_graph->get(), _inputs[0]->handle(), _src_tensor_roi, _outputs[0]->handle(), _input_layout, _roi_type); + + vx_status status; + if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Adding the copy (vxCastNode) node failed: " + TOSTR(status)) + +} diff --git a/rocAL_pybind/amd/rocal/fn.py b/rocAL_pybind/amd/rocal/fn.py index 703769a9a..3ae9b5b06 100644 --- a/rocAL_pybind/amd/rocal/fn.py +++ b/rocAL_pybind/amd/rocal/fn.py @@ -1161,3 +1161,9 @@ def normalize(*inputs, axes=[], mean=[], stddev=[], scale=1.0, shift=0.0, output "scale": scale, "shift": shift, "output_layout": output_layout, "output_dtype": output_dtype} normalized_image = b.normalize(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) return (normalized_image) + +def cast(*inputs, output_dtype=types.UINT8): + # pybind call arguments + kwargs_pybind = {"input_image": inputs[0], "is_output": False, "output_dtype": output_dtype} + normalized_image = b.normalize(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return (normalized_image) diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 49399d2ef..d125717f0 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -679,5 +679,7 @@ PYBIND11_MODULE(rocal_pybind, m) { py::return_value_policy::reference); m.def("normalize", &rocalNormalize, py::return_value_policy::reference); + m.def("cast", &rocalCast, + py::return_value_policy::reference); } } // namespace rocal From 510a83dad1d57da3ac17cef1493e8e0884f9988e Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Thu, 14 Dec 2023 14:04:52 +0000 Subject: [PATCH 19/33] Resolve PR comments - 1 --- rocAL/include/api/rocal_api_data_loaders.h | 14 +++++++------- rocAL/include/loaders/image/node_numpy_loader.h | 4 ++-- .../loaders/image/node_numpy_loader_single_shard.h | 2 +- rocAL/include/loaders/image/numpy_loader.h | 3 +-- rocAL/include/loaders/image/numpy_loader_sharded.h | 3 +-- rocAL/include/pipeline/tensor.h | 2 +- rocAL/include/readers/image/numpy_data_reader.h | 2 +- rocAL/source/loaders/image/node_numpy_loader.cpp | 2 +- .../image/node_numpy_loader_single_shard.cpp | 2 +- rocAL/source/loaders/image/numpy_loader.cpp | 2 +- .../source/loaders/image/numpy_loader_sharded.cpp | 2 +- rocAL/source/readers/image/numpy_data_reader.cpp | 2 +- 12 files changed, 19 insertions(+), 21 deletions(-) diff --git a/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/include/api/rocal_api_data_loaders.h index 430254736..0e53f8553 100644 --- a/rocAL/include/api/rocal_api_data_loaders.h +++ b/rocAL/include/api/rocal_api_data_loaders.h @@ -588,13 +588,13 @@ extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalCon * \return Reference to the output tensor */ extern "C" RocalTensor ROCAL_API_CALL rocalNumpyFileSource( - RocalContext p_context, - const char* source_path, - unsigned internal_shard_count, - bool is_output = false, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MAX_SIZE); + RocalContext p_context, + const char* source_path, + unsigned internal_shard_count, + bool is_output = false, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MAX_SIZE); /*! \brief Creates Numpy raw data reader and loader. It allocates the resources and objects required to read raw data stored on the numpy arrays. * \ingroup group_rocal_data_loaders diff --git a/rocAL/include/loaders/image/node_numpy_loader.h b/rocAL/include/loaders/image/node_numpy_loader.h index 5e2a5975f..587f89e1a 100644 --- a/rocAL/include/loaders/image/node_numpy_loader.h +++ b/rocAL/include/loaders/image/node_numpy_loader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -50,4 +50,4 @@ class NumpyLoaderNode : public Node { private: std::shared_ptr _loader_module = nullptr; -}; \ No newline at end of file +}; diff --git a/rocAL/include/loaders/image/node_numpy_loader_single_shard.h b/rocAL/include/loaders/image/node_numpy_loader_single_shard.h index d2ce4a1f6..c1cffba54 100644 --- a/rocAL/include/loaders/image/node_numpy_loader_single_shard.h +++ b/rocAL/include/loaders/image/node_numpy_loader_single_shard.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/image/numpy_loader.h b/rocAL/include/loaders/image/numpy_loader.h index 3b8fe4d24..b10cfc1a8 100644 --- a/rocAL/include/loaders/image/numpy_loader.h +++ b/rocAL/include/loaders/image/numpy_loader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,6 @@ THE SOFTWARE. #include "circular_buffer.h" #include "commons.h" #include "image_read_and_decode.h" -// #include "numpy_data_reader.h" // // NumpyLoader runs an internal thread for loading an decoding of numpy arrays asynchronously // it uses a circular buffer to store decoded numpy arrays for the user diff --git a/rocAL/include/loaders/image/numpy_loader_sharded.h b/rocAL/include/loaders/image/numpy_loader_sharded.h index b13f93f30..acd3eb6dd 100644 --- a/rocAL/include/loaders/image/numpy_loader_sharded.h +++ b/rocAL/include/loaders/image/numpy_loader_sharded.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,6 @@ THE SOFTWARE. */ #pragma once -#include #include "numpy_loader.h" // diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index 428dfe38d..82b2e30ef 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/readers/image/numpy_data_reader.h b/rocAL/include/readers/image/numpy_data_reader.h index faf881448..2ef319039 100644 --- a/rocAL/include/readers/image/numpy_data_reader.h +++ b/rocAL/include/readers/image/numpy_data_reader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/node_numpy_loader.cpp b/rocAL/source/loaders/image/node_numpy_loader.cpp index 63b8cd2ae..eeb51d35d 100644 --- a/rocAL/source/loaders/image/node_numpy_loader.cpp +++ b/rocAL/source/loaders/image/node_numpy_loader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp b/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp index bc1a68b0c..705dd9561 100644 --- a/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp +++ b/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/numpy_loader.cpp b/rocAL/source/loaders/image/numpy_loader.cpp index 9fd856793..c43a25aa1 100644 --- a/rocAL/source/loaders/image/numpy_loader.cpp +++ b/rocAL/source/loaders/image/numpy_loader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/numpy_loader_sharded.cpp b/rocAL/source/loaders/image/numpy_loader_sharded.cpp index 8399abf11..c8413bc3b 100644 --- a/rocAL/source/loaders/image/numpy_loader_sharded.cpp +++ b/rocAL/source/loaders/image/numpy_loader_sharded.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index 1f1cc7b6b..e088801ec 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From b28e61cb3cb2bf449e317820d33e7fd196d82f11 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Fri, 15 Dec 2023 07:58:35 +0000 Subject: [PATCH 20/33] Adding header caching, files and seed options to numpy reader --- rocAL/include/api/rocal_api_data_loaders.h | 20 +-- .../include/loaders/image/node_numpy_loader.h | 4 +- .../image/node_numpy_loader_single_shard.h | 4 +- rocAL/include/readers/image/image_reader.h | 6 + .../include/readers/image/numpy_data_reader.h | 6 + rocAL/source/api/rocal_api_data_loaders.cpp | 23 ++-- .../loaders/image/node_numpy_loader.cpp | 8 +- .../image/node_numpy_loader_single_shard.cpp | 8 +- .../readers/image/numpy_data_reader.cpp | 127 +++++++++++------- rocAL_pybind/amd/rocal/plugin/pytorch.py | 25 +++- rocAL_pybind/amd/rocal/readers.py | 8 +- .../rocAL_unittests/rocAL_unittests.cpp | 2 +- 12 files changed, 164 insertions(+), 77 deletions(-) diff --git a/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/include/api/rocal_api_data_loaders.h index 0e53f8553..626465e16 100644 --- a/rocAL/include/api/rocal_api_data_loaders.h +++ b/rocAL/include/api/rocal_api_data_loaders.h @@ -588,13 +588,15 @@ extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalCon * \return Reference to the output tensor */ extern "C" RocalTensor ROCAL_API_CALL rocalNumpyFileSource( - RocalContext p_context, - const char* source_path, - unsigned internal_shard_count, - bool is_output = false, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MAX_SIZE); + RocalContext p_context, + const char* source_path, + unsigned internal_shard_count, + std::vector files = {}, + bool is_output = false, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MAX_SIZE, + unsigned seed = 0); /*! \brief Creates Numpy raw data reader and loader. It allocates the resources and objects required to read raw data stored on the numpy arrays. * \ingroup group_rocal_data_loaders @@ -611,12 +613,14 @@ extern "C" RocalTensor ROCAL_API_CALL rocalNumpyFileSource( extern "C" RocalTensor rocalNumpyFileSourceSingleShard( RocalContext p_context, const char* source_path, + std::vector files = {}, bool is_output = false, bool shuffle = false, bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MAX_SIZE, unsigned shard_id = 0, - unsigned shard_count = 1); + unsigned shard_count = 1, + unsigned seed = 0); /*! * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. diff --git a/rocAL/include/loaders/image/node_numpy_loader.h b/rocAL/include/loaders/image/node_numpy_loader.h index 587f89e1a..49918e4f5 100644 --- a/rocAL/include/loaders/image/node_numpy_loader.h +++ b/rocAL/include/loaders/image/node_numpy_loader.h @@ -39,8 +39,8 @@ class NumpyLoaderNode : public Node { /// \param load_batch_count Defines the quantum count of the images to be loaded. It's usually equal to the user's batch size. /// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count, /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. - void init(unsigned internal_shard_count, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, - size_t load_batch_count, RocalMemType mem_type, bool decoder_keep_orig = false, const std::map feature_key_map = std::map(), const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); + void init(unsigned internal_shard_count, const std::string &source_path, const std::vector &files, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, + size_t load_batch_count, RocalMemType mem_type, unsigned seed = 0, bool decoder_keep_orig = false, const std::map feature_key_map = std::map(), const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); std::shared_ptr get_loader_module(); diff --git a/rocAL/include/loaders/image/node_numpy_loader_single_shard.h b/rocAL/include/loaders/image/node_numpy_loader_single_shard.h index c1cffba54..cd3b464e7 100644 --- a/rocAL/include/loaders/image/node_numpy_loader_single_shard.h +++ b/rocAL/include/loaders/image/node_numpy_loader_single_shard.h @@ -36,9 +36,9 @@ class NumpyLoaderSingleShardNode : public Node { /// \param load_batch_count Defines the quantum count of the images to be loaded. It's usually equal to the user's batch size. /// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count, /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. - void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, const std::string &json_path, + void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, const std::vector &files, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, - size_t load_batch_count, RocalMemType mem_type, bool decoder_keep_orig = false, const std::map feature_key_map = std::map(), unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); + size_t load_batch_count, RocalMemType mem_type, unsigned seed = 0, bool decoder_keep_orig = false, const std::map feature_key_map = std::map(), unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); std::shared_ptr get_loader_module(); diff --git a/rocAL/include/readers/image/image_reader.h b/rocAL/include/readers/image/image_reader.h index 6269e1781..75e44ff8a 100644 --- a/rocAL/include/readers/image/image_reader.h +++ b/rocAL/include/readers/image/image_reader.h @@ -73,6 +73,8 @@ struct ReaderConfig { void set_sequence_length(unsigned sequence_length) { _sequence_length = sequence_length; } void set_frame_step(unsigned step) { _sequence_frame_step = step; } void set_frame_stride(unsigned stride) { _sequence_frame_stride = stride; } + void set_files(const std::vector &files) { _files = files; } + void set_seed(unsigned seed) { _seed = seed; } size_t get_shard_count() { return _shard_count; } size_t get_shard_id() { return _shard_id; } size_t get_cpu_num_threads() { return _cpu_num_threads; } @@ -80,7 +82,9 @@ struct ReaderConfig { size_t get_sequence_length() { return _sequence_length; } size_t get_frame_step() { return _sequence_frame_step; } size_t get_frame_stride() { return _sequence_frame_stride; } + std::vector get_files() { return _files; } std::string path() { return _path; } + unsigned seed() { return _seed; } #ifdef ROCAL_VIDEO void set_video_properties(VideoProperties video_prop) { _video_prop = video_prop; } VideoProperties get_video_properties() { return _video_prop; } @@ -107,6 +111,8 @@ struct ReaderConfig { bool _loop = false; std::string _file_prefix = ""; //!< to read only files with prefix. supported only for cifar10_data_reader and tf_record_reader std::shared_ptr _meta_data_reader = nullptr; + std::vector _files; + unsigned _seed = 0; #ifdef ROCAL_VIDEO VideoProperties _video_prop; #endif diff --git a/rocAL/include/readers/image/numpy_data_reader.h b/rocAL/include/readers/image/numpy_data_reader.h index 2ef319039..e4fa7cc63 100644 --- a/rocAL/include/readers/image/numpy_data_reader.h +++ b/rocAL/include/readers/image/numpy_data_reader.h @@ -77,6 +77,7 @@ class NumpyDataReader : public Reader { DIR* _sub_dir; struct dirent* _entity; std::vector _file_names; + std::vector _files; std::vector _file_headers; unsigned _curr_file_idx; FILE* _current_fPtr; @@ -94,8 +95,11 @@ class NumpyDataReader : public Reader { bool _loop; bool _shuffle; int _read_counter = 0; + unsigned _seed = 0; //!< _file_count_all_shards total_number of files in to figure out the max_batch_size (usually needed for distributed training). size_t _file_count_all_shards; + std::mutex _cache_mutex_; + std::map _header_cache_; const RocalTensorDataType TypeFromNumpyStr(const std::string& format); inline void SkipSpaces(const char*& ptr); void ParseHeaderContents(NumpyHeaderData& target, const std::string& header); @@ -111,6 +115,8 @@ class NumpyDataReader : public Reader { void ParseHeader(NumpyHeaderData& parsed_header, std::string file_path); template size_t ParseNumpyData(T* buf, std::vector strides, std::vector shapes, unsigned dim = 0); + bool GetFromCache(const std::string& file_name, NumpyHeaderData& target); + void UpdateCache(const std::string& file_name, const NumpyHeaderData& value); void incremenet_read_ptr(); int release(); size_t get_file_shard_id(); diff --git a/rocAL/source/api/rocal_api_data_loaders.cpp b/rocAL/source/api/rocal_api_data_loaders.cpp index 44dde077a..6f11c2a92 100644 --- a/rocAL/source/api/rocal_api_data_loaders.cpp +++ b/rocAL/source/api/rocal_api_data_loaders.cpp @@ -74,7 +74,7 @@ evaluate_image_data_set(RocalImageSizeEvaluationPolicy decode_size_policy, Stora std::vector evaluate_numpy_data_set(RocalImageSizeEvaluationPolicy decode_size_policy, StorageType storage_type, - DecoderType decoder_type, const std::string &source_path, const std::string &json_path) + DecoderType decoder_type, const std::string &source_path, const std::vector &files) { auto translate_image_size_policy = [](RocalImageSizeEvaluationPolicy decode_size_policy) { @@ -92,7 +92,10 @@ evaluate_numpy_data_set(RocalImageSizeEvaluationPolicy decode_size_policy, Stora ImageSourceEvaluator source_evaluator; source_evaluator.set_size_evaluation_policy(translate_image_size_policy(decode_size_policy)); - if(source_evaluator.create(ReaderConfig(storage_type, source_path, json_path)) != ImageSourceEvaluatorStatus::OK) + auto reader_cfg = ReaderConfig(storage_type, source_path); + if (!files.empty()) + reader_cfg.set_files(files); + if (source_evaluator.create(reader_cfg) != ImageSourceEvaluatorStatus::OK) THROW("Initializing file source input evaluator failed ") auto max_dims = source_evaluator.max_numpy_dims(); int data_type = (int)source_evaluator.get_numpy_dtype(); @@ -1636,15 +1639,17 @@ rocalNumpyFileSource( RocalContext p_context, const char* source_path, unsigned internal_shard_count, + std::vector files, bool is_output, bool shuffle, bool loop, - RocalImageSizeEvaluationPolicy decode_size_policy) { + RocalImageSizeEvaluationPolicy decode_size_policy, + unsigned seed) { Tensor* output = nullptr; auto context = static_cast(p_context); try { auto max_dimensions = evaluate_numpy_data_set(decode_size_policy, StorageType::NUMPY_DATA, DecoderType::SKIP_DECODE, - source_path, ""); + source_path, files); RocalTensorlayout tensor_format = RocalTensorlayout::NONE; RocalTensorDataType tensor_data_type; @@ -1672,7 +1677,7 @@ rocalNumpyFileSource( info.set_max_shape(); output = context->master_graph->create_loader_output_tensor(info); - context->master_graph->add_node({}, {output})->init(internal_shard_count, source_path, "", StorageType::NUMPY_DATA, DecoderType::SKIP_DECODE, shuffle, loop, context->user_batch_size(), context->master_graph->mem_type()); + context->master_graph->add_node({}, {output})->init(internal_shard_count, source_path, files, StorageType::NUMPY_DATA, DecoderType::SKIP_DECODE, shuffle, loop, context->user_batch_size(), context->master_graph->mem_type(), seed); context->master_graph->set_loop(loop); if (is_output) { @@ -1691,12 +1696,14 @@ RocalTensor ROCAL_API_CALL rocalNumpyFileSourceSingleShard( RocalContext p_context, const char* source_path, + std::vector files, bool is_output, bool shuffle, bool loop, RocalImageSizeEvaluationPolicy decode_size_policy, unsigned shard_id, - unsigned shard_count) { + unsigned shard_count, + unsigned seed) { Tensor* output = nullptr; auto context = static_cast(p_context); try { @@ -1707,7 +1714,7 @@ rocalNumpyFileSourceSingleShard( THROW("Shard id should be smaller than shard count") auto max_dimensions = evaluate_numpy_data_set(decode_size_policy, StorageType::NUMPY_DATA, DecoderType::SKIP_DECODE, - source_path, ""); + source_path, files); RocalTensorlayout tensor_format = RocalTensorlayout::NONE; RocalTensorDataType tensor_data_type; @@ -1735,7 +1742,7 @@ rocalNumpyFileSourceSingleShard( info.set_max_shape(); output = context->master_graph->create_loader_output_tensor(info); - context->master_graph->add_node({}, {output})->init(shard_id, shard_count, source_path, "", StorageType::NUMPY_DATA, DecoderType::SKIP_DECODE, shuffle, loop, context->user_batch_size(), context->master_graph->mem_type()); + context->master_graph->add_node({}, {output})->init(shard_id, shard_count, source_path, files, StorageType::NUMPY_DATA, DecoderType::SKIP_DECODE, shuffle, loop, context->user_batch_size(), context->master_graph->mem_type(), seed); context->master_graph->set_loop(loop); if (is_output) { diff --git a/rocAL/source/loaders/image/node_numpy_loader.cpp b/rocAL/source/loaders/image/node_numpy_loader.cpp index eeb51d35d..3f5319490 100644 --- a/rocAL/source/loaders/image/node_numpy_loader.cpp +++ b/rocAL/source/loaders/image/node_numpy_loader.cpp @@ -28,18 +28,20 @@ NumpyLoaderNode::NumpyLoaderNode(Tensor *output, void *device_resources) : Node( _loader_module = std::make_shared(device_resources); } -void NumpyLoaderNode::init(unsigned internal_shard_count, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, - size_t load_batch_count, RocalMemType mem_type, bool decoder_keep_orig, const std::map feature_key_map, const char *file_prefix, unsigned sequence_length, unsigned step, unsigned stride) { +void NumpyLoaderNode::init(unsigned internal_shard_count, const std::string &source_path, const std::vector &files, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, + size_t load_batch_count, RocalMemType mem_type, unsigned seed, bool decoder_keep_orig, const std::map feature_key_map, const char *file_prefix, unsigned sequence_length, unsigned step, unsigned stride) { if (!_loader_module) THROW("ERROR: loader module is not set for NumpyLoaderNode, cannot initialize") if (internal_shard_count < 1) THROW("Shard count should be greater than or equal to one") _loader_module->set_output(_outputs[0]); // Set reader and decoder config accordingly for the NumpyLoaderNode - auto reader_cfg = ReaderConfig(storage_type, source_path, json_path, feature_key_map, shuffle, loop); + auto reader_cfg = ReaderConfig(storage_type, source_path, "", feature_key_map, shuffle, loop); reader_cfg.set_shard_count(internal_shard_count); reader_cfg.set_batch_count(load_batch_count); reader_cfg.set_file_prefix(file_prefix); + reader_cfg.set_files(files); + reader_cfg.set_seed(seed); // sequence_length, step and stride parameters used only for SequenceReader reader_cfg.set_sequence_length(sequence_length); reader_cfg.set_frame_step(step); diff --git a/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp b/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp index 705dd9561..ed9d3730a 100644 --- a/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp +++ b/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp @@ -28,8 +28,8 @@ NumpyLoaderSingleShardNode::NumpyLoaderSingleShardNode(Tensor *output, void *dev _loader_module = std::make_shared(device_resources); } -void NumpyLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type, - bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, +void NumpyLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, const std::string &source_path, const std::vector &files, StorageType storage_type, DecoderType decoder_type, + bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, unsigned seed, bool decoder_keep_original, const std::map feature_key_map, unsigned sequence_length, unsigned step, unsigned stride) { if (!_loader_module) THROW("ERROR: loader module is not set for NumpyLoaderNode, cannot initialize") @@ -39,10 +39,12 @@ void NumpyLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, c THROW("Shard is should be smaller than shard count") _loader_module->set_output(_outputs[0]); // Set reader and decoder config accordingly for the NumpyLoaderNode - auto reader_cfg = ReaderConfig(storage_type, source_path, json_path, feature_key_map, shuffle, loop); + auto reader_cfg = ReaderConfig(storage_type, source_path, "", feature_key_map, shuffle, loop); reader_cfg.set_shard_count(shard_count); reader_cfg.set_shard_id(shard_id); reader_cfg.set_batch_count(load_batch_count); + reader_cfg.set_files(files); + reader_cfg.set_seed(seed); // sequence_length, step and stride parameters used only for SequenceReader reader_cfg.set_sequence_length(sequence_length); reader_cfg.set_frame_step(step); diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index ae24a4efa..67605d508 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -26,6 +26,7 @@ THE SOFTWARE. #include #include +#include #include #include @@ -61,9 +62,11 @@ Reader::Status NumpyDataReader::initialize(ReaderConfig desc) { _batch_count = desc.get_batch_size(); _shuffle = desc.shuffle(); _loop = desc.loop(); + _files = desc.get_files(); + _seed = desc.seed(); ret = subfolder_reading(); // the following code is required to make every shard the same size:: required for multi-gpu training - if (_shard_count > 1 && _batch_count > 1) { + if (_shard_count > 1 && _batch_count > 1 && _files.empty()) { int _num_batches = _file_names.size() / _batch_count; int max_batches_per_shard = (_file_count_all_shards + _shard_count - 1) / _shard_count; max_batches_per_shard = (max_batches_per_shard + _batch_count - 1) / _batch_count; @@ -74,8 +77,10 @@ Reader::Status NumpyDataReader::initialize(ReaderConfig desc) { _file_headers.resize(_file_names.size()); // shuffle dataset if set _shuffle_time.start(); - if (ret == Reader::Status::OK && _shuffle) - std::random_shuffle(_file_names.begin(), _file_names.end()); + if (ret == Reader::Status::OK && _shuffle) { + std::mt19937 rng(_seed); + std::shuffle(_file_names.begin(), _file_names.end(), rng); + } _shuffle_time.end(); return ret; } @@ -94,12 +99,36 @@ size_t NumpyDataReader::open() { _last_id.erase(0, last_slash_idx + 1); } - ParseHeader(_file_headers[_curr_file_idx], file_path); + auto ret = GetFromCache(file_path, _file_headers[_curr_file_idx]); + if (!ret) { + ParseHeader(_file_headers[_curr_file_idx], file_path); + UpdateCache(file_path, _file_headers[_curr_file_idx]); + } else { + _current_fPtr = std::fopen(file_path.c_str(), "rb"); + if (_current_fPtr == nullptr) + THROW("Could not open file " + file_path + ": " + std::strerror(errno)); + } fseek(_current_fPtr, 0, SEEK_SET); // Take the file pointer back to the start return _file_headers[_curr_file_idx].nbytes(); } +bool NumpyDataReader::GetFromCache(const std::string& file_name, NumpyHeaderData& header) { + std::unique_lock cache_lock(_cache_mutex_); + auto it = _header_cache_.find(file_name); + if (it == _header_cache_.end()) { + return false; + } else { + header = it->second; + return true; + } +} + +void NumpyDataReader::UpdateCache(const std::string& file_name, const NumpyHeaderData& value) { + std::unique_lock cache_lock(_cache_mutex_); + _header_cache_[file_name] = value; +} + const RocalTensorDataType NumpyDataReader::TypeFromNumpyStr(const std::string& format) { if (format == "u1") return RocalTensorDataType::UINT8; // if (format == "u2") return TypeTable::GetTypeInfo(); // Currently not supported in rocAL @@ -378,18 +407,8 @@ int NumpyDataReader::release() { void NumpyDataReader::reset() { _shuffle_time.start(); if (_shuffle) { - std::vector shuffled_filenames; - std::vector shuffled_headers; - std::vector indexes(_file_names.size()); - std::iota(indexes.begin(), indexes.end(), 0); - // Shuffle the index vector and use the index to fetch batch size elements for decoding - std::random_shuffle(indexes.begin(), indexes.end()); - for (auto const idx : indexes) { - shuffled_filenames.push_back(_file_names[idx]); - shuffled_headers.push_back(_file_headers[idx]); - } - _file_names = shuffled_filenames; - _file_headers = shuffled_headers; + std::mt19937 rng(_seed); + std::shuffle(_file_names.begin(), _file_names.end(), rng); } _shuffle_time.end(); _read_counter = 0; @@ -397,38 +416,56 @@ void NumpyDataReader::reset() { } Reader::Status NumpyDataReader::subfolder_reading() { - if ((_sub_dir = opendir(_folder_path.c_str())) == nullptr) - THROW("NumpyDataReader ShardID [" + TOSTR(_shard_id) + "] ERROR: Failed opening the directory at " + _folder_path); - - std::vector entry_name_list; - std::string _full_path = _folder_path; + auto ret = Reader::Status::OK; + if (!_files.empty()) { + for (unsigned file_count = 0; file_count < _files.size(); file_count++) { + std::string file_path = _files[file_count]; + filesys::path pathObj(file_path); + if (filesys::exists(pathObj) && filesys::is_regular_file(pathObj)) { + // ignore files with extensions .tar, .zip, .7z + auto file_extension_idx = file_path.find_last_of("."); + if (file_extension_idx != std::string::npos) { + std::string file_extension = file_path.substr(file_extension_idx + 1); + if (file_extension != "npy") + continue; + else + _file_names.push_back(file_path); + } + } + } + } else { + if ((_sub_dir = opendir(_folder_path.c_str())) == nullptr) + THROW("NumpyDataReader ShardID [" + TOSTR(_shard_id) + "] ERROR: Failed opening the directory at " + _folder_path); - while ((_entity = readdir(_sub_dir)) != nullptr) { - std::string entry_name(_entity->d_name); - if (strcmp(_entity->d_name, ".") == 0 || strcmp(_entity->d_name, "..") == 0) continue; - entry_name_list.push_back(entry_name); - } - closedir(_sub_dir); - std::sort(entry_name_list.begin(), entry_name_list.end()); + std::vector entry_name_list; + std::string _full_path = _folder_path; - auto ret = Reader::Status::OK; - for (unsigned dir_count = 0; dir_count < entry_name_list.size(); ++dir_count) { - std::string subfolder_path = _full_path + "/" + entry_name_list[dir_count]; - filesys::path pathObj(subfolder_path); - if (filesys::exists(pathObj) && filesys::is_regular_file(pathObj)) { - // ignore files with extensions .tar, .zip, .7z - auto file_extension_idx = subfolder_path.find_last_of("."); - if (file_extension_idx != std::string::npos) { - std::string file_extension = subfolder_path.substr(file_extension_idx + 1); - if (file_extension != "npy") - continue; + while ((_entity = readdir(_sub_dir)) != nullptr) { + std::string entry_name(_entity->d_name); + if (strcmp(_entity->d_name, ".") == 0 || strcmp(_entity->d_name, "..") == 0) continue; + entry_name_list.push_back(entry_name); + } + closedir(_sub_dir); + std::sort(entry_name_list.begin(), entry_name_list.end()); + + for (unsigned dir_count = 0; dir_count < entry_name_list.size(); ++dir_count) { + std::string subfolder_path = _full_path + "/" + entry_name_list[dir_count]; + filesys::path pathObj(subfolder_path); + if (filesys::exists(pathObj) && filesys::is_regular_file(pathObj)) { + // ignore files with extensions .tar, .zip, .7z + auto file_extension_idx = subfolder_path.find_last_of("."); + if (file_extension_idx != std::string::npos) { + std::string file_extension = subfolder_path.substr(file_extension_idx + 1); + if (file_extension != "npy") + continue; + } + ret = open_folder(); + break; // assume directory has only files. + } else if (filesys::exists(pathObj) && filesys::is_directory(pathObj)) { + _folder_path = subfolder_path; + if (open_folder() != Reader::Status::OK) + WRN("NumpyDataReader ShardID [" + TOSTR(_shard_id) + "] File reader cannot access the storage at " + _folder_path); } - ret = open_folder(); - break; // assume directory has only files. - } else if (filesys::exists(pathObj) && filesys::is_directory(pathObj)) { - _folder_path = subfolder_path; - if (open_folder() != Reader::Status::OK) - WRN("NumpyDataReader ShardID [" + TOSTR(_shard_id) + "] File reader cannot access the storage at " + _folder_path); } } if (_in_batch_read_count > 0 && _in_batch_read_count < _batch_count) { diff --git a/rocAL_pybind/amd/rocal/plugin/pytorch.py b/rocAL_pybind/amd/rocal/plugin/pytorch.py index 01e7e7a05..2b5d3cdb8 100644 --- a/rocAL_pybind/amd/rocal/plugin/pytorch.py +++ b/rocAL_pybind/amd/rocal/plugin/pytorch.py @@ -30,13 +30,15 @@ class ROCALNumpyIterator(object): - def __init__(self, pipeline, tensor_dtype=types.FLOAT, device="cpu", device_id=0): + def __init__(self, pipeline, tensor_dtype=types.FLOAT, device="cpu", device_id=0, return_roi=False): self.loader = pipeline self.tensor_dtype = tensor_dtype self.device = device self.device_id = device_id self.output_memory_type = self.loader._output_memory_type self.output_list = None + self.batch_size = self.loader._batch_size + self.return_roi = return_roi print("self.device", self.device) self.len = b.getRemainingImages(self.loader._handle) @@ -53,6 +55,15 @@ def __next__(self): self.output_list = [] for i in range(len(self.output_tensor_list)): dimensions = self.output_tensor_list[i].dimensions() + if self.return_roi: + self.num_dims = len(dimensions) - 1 + self.roi_array = np.zeros(self.batch_size * self.num_dims * 2, dtype=np.uint32) + self.output_tensor_list[i].copy_roi(self.roi_array) + self.max_roi_size = np.zeros(self.num_dims, dtype=np.uint32) + for j in range(self.batch_size): + index = j * self.num_dims * 2 + roi_size = self.roi_array[index + self.num_dims : index + self.num_dims * 2] - self.roi_array[index : index + self.num_dims] + self.max_roi_size = np.maximum(roi_size, self.max_roi_size) if self.device == "cpu": torch_dtype = self.output_tensor_list[i].dtype() output = torch.empty( @@ -68,8 +79,20 @@ def __next__(self): self.output_list.append(output) else: for i in range(len(self.output_tensor_list)): + if self.return_roi: + self.output_tensor_list[i].copy_roi(self.roi_array) + self.max_roi_size = np.zeros(self.num_dims, dtype=np.uint32) + for j in range(self.batch_size): + index = j * self.num_dims * 2 + roi_size = self.roi_array[index + self.num_dims : index + self.num_dims * 2] - self.roi_array[index : index + self.num_dims] + self.max_roi_size = np.maximum(roi_size, self.max_roi_size) self.output_tensor_list[i].copy_data(ctypes.c_void_p( self.output_list[i].data_ptr()), self.output_memory_type) + if self.return_roi: + roi_output_list = [] + for i in range(len(self.output_list)): + roi_output_list.append(self.output_list[i][:, :self.max_roi_size[0], :self.max_roi_size[1], :self.max_roi_size[2], :self.max_roi_size[3]]) + return roi_output_list return self.output_list def reset(self): diff --git a/rocAL_pybind/amd/rocal/readers.py b/rocAL_pybind/amd/rocal/readers.py index 0ee1f3840..b115a3d92 100644 --- a/rocAL_pybind/amd/rocal/readers.py +++ b/rocAL_pybind/amd/rocal/readers.py @@ -352,13 +352,13 @@ def mxnet(path, stick_to_shard=False, pad_last_batch=False): return mxnet_metadata -def numpy(*inputs, file_root='', num_shards=1, - random_shuffle=False, shard_id=0, stick_to_shard=False, pad_last_batch=False): +def numpy(*inputs, file_root='', files=[], num_shards=1, + random_shuffle=False, shard_id=0, stick_to_shard=False, pad_last_batch=False, seed=0): Pipeline._current_pipeline._reader = "NumpyReader" # Output - kwargs_pybind = {"source_path": file_root, "is_output": False, "shuffle": random_shuffle, - "loop": False, "decode_size_policy": types.MAX_SIZE, "shard_id": shard_id, "shard_count": num_shards} + kwargs_pybind = {"source_path": file_root, "files": files, "is_output": False, "shuffle": random_shuffle, + "loop": False, "decode_size_policy": types.MAX_SIZE, "shard_id": shard_id, "shard_count": num_shards, "seed": seed} numpy_reader_output = b.numpyReaderSourceShard( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) return (numpy_reader_output) diff --git a/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp b/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp index 066aa4199..51265859f 100644 --- a/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp +++ b/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp @@ -323,7 +323,7 @@ int test(int test_case, int reader_type, const char *path, const char *outName, { std::cout << ">>>>>>> Running Numpy reader" << std::endl; pipeline_type = 4; - decoded_output = rocalNumpyFileSource(handle, path, num_threads, false, false, false, ROCAL_USE_MAX_SIZE); + decoded_output = rocalNumpyFileSource(handle, path, num_threads, {}, false, false, false, ROCAL_USE_MAX_SIZE); } break; default: { std::cout << ">>>>>>> Running IMAGE READER" << std::endl; From 2f71922dd622bd05d57541fe11d7dbd5a489fec3 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Fri, 15 Dec 2023 08:08:56 +0000 Subject: [PATCH 21/33] Fixing build issues in rocAL --- rocAL/source/augmentations/node_cast.cpp | 6 +++++- .../examples/rocAL_api_numpy_reader.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/rocAL/source/augmentations/node_cast.cpp b/rocAL/source/augmentations/node_cast.cpp index d1949560e..cff54c5c2 100644 --- a/rocAL/source/augmentations/node_cast.cpp +++ b/rocAL/source/augmentations/node_cast.cpp @@ -31,7 +31,11 @@ void CastNode::create_node() { if(_node) return; - _node = vxExtRppCast(_graph->get(), _inputs[0]->handle(), _src_tensor_roi, _outputs[0]->handle(), _input_layout, _roi_type); + int input_layout = (int)_inputs[0]->info().layout(); + int roi_type = static_cast(_inputs[0]->info().roi_type()); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + vx_scalar roi_type_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &roi_type); + _node = vxExtRppCast(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), input_layout_vx, roi_type_vx); vx_status status; if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) diff --git a/rocAL_pybind/examples/rocAL_api_numpy_reader.py b/rocAL_pybind/examples/rocAL_api_numpy_reader.py index 60c797a31..09e50a7f6 100644 --- a/rocAL_pybind/examples/rocAL_api_numpy_reader.py +++ b/rocAL_pybind/examples/rocAL_api_numpy_reader.py @@ -56,30 +56,30 @@ def main(): pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) with pipeline: - numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) - label_output = fn.readers.numpy(file_root=data_path, files=y_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) + numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_train, shard_id=local_rank, num_shards=world_size) + label_output = fn.readers.numpy(file_root=data_path, files=y_train, shard_id=local_rank, num_shards=world_size) data_output = fn.set_layout(numpy_reader_output, output_layout=types.NHWC) normalized_output = fn.normalize(data_output, axes=[0,1], mean=MEAN, stddev=STDDEV, output_layout=types.NHWC, output_dtype=types.FLOAT) - transposed_output = fn.transpose(normalized_output, perm=[2,1,0], output_layout=types.NCHW, output_dtype=types.FLOAT) + transposed_output = fn.transpose(normalized_output, perm=[2,0,1], output_layout=types.NCHW, output_dtype=types.FLOAT) pipeline.set_outputs(transposed_output, label_output) pipeline.build() - pipeline1 = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) + val_pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) - with pipeline1: + with val_pipeline: numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_val, shard_id=local_rank, num_shards=world_size, seed=random_seed+local_rank) label_output = fn.readers.numpy(file_root=data_path, files=y_val, shard_id=local_rank, num_shards=world_size, seed=random_seed+local_rank) data_output = fn.set_layout(numpy_reader_output, output_layout=types.NHWC) normalized_output = fn.normalize(data_output, axes=[0,1], mean=MEAN, stddev=STDDEV, output_layout=types.NHWC, output_dtype=types.FLOAT) - transposed_output = fn.transpose(normalized_output, perm=[2,1,0], output_layout=types.NCHW, output_dtype=types.FLOAT) - pipeline1.set_outputs(transposed_output, label_output) + transposed_output = fn.transpose(normalized_output, perm=[2,0,1], output_layout=types.NCHW, output_dtype=types.FLOAT) + val_pipeline.set_outputs(transposed_output, label_output) - pipeline1.build() + val_pipeline.build() numpyIteratorPipeline = ROCALNumpyIterator(pipeline, device='cpu' if rocal_cpu else 'gpu') print(len(numpyIteratorPipeline)) - valNumpyIteratorPipeline = ROCALNumpyIterator(pipeline1, device='cpu' if rocal_cpu else 'gpu') + valNumpyIteratorPipeline = ROCALNumpyIterator(val_pipeline, device='cpu' if rocal_cpu else 'gpu') print(len(valNumpyIteratorPipeline)) cnt = 0 for epoch in range(2): From 182773ceb733835aa8bb7ee60703b5d5537d62cb Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Fri, 19 Jan 2024 18:29:14 +0000 Subject: [PATCH 22/33] Adding back missed formatting for crop node --- .../augmentations/geometry_augmentations/node_crop.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp b/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp index 6574ea1bf..6ca4cbd2c 100644 --- a/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp +++ b/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp @@ -102,8 +102,8 @@ void CropNode::create_crop_tensor() { vx_size num_of_dims = 2; vx_size stride[num_of_dims]; std::vector _crop_tensor_dims = {_batch_size, 4}; - if (_inputs[0]->info().layout() == RocalTensorlayout::NFCHW || _inputs[0]->info().layout() == RocalTensorlayout::NFHWC) - _crop_tensor_dims = {_inputs[0]->info().dims()[0] * _inputs[0]->info().dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions + if(_inputs[0]->info().layout() == RocalTensorlayout::NFCHW || _inputs[0]->info().layout() == RocalTensorlayout::NFHWC) + _crop_tensor_dims = {_inputs[0]->info().dims()[0] * _inputs[0]->info().dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions stride[0] = sizeof(vx_uint32); stride[1] = stride[0] * _crop_tensor_dims[0]; vx_enum mem_type = VX_MEMORY_TYPE_HOST; @@ -111,8 +111,8 @@ void CropNode::create_crop_tensor() { mem_type = VX_MEMORY_TYPE_HIP; allocate_host_or_pinned_mem(&_crop_coordinates, stride[1] * 4, _inputs[0]->info().mem_type()); - _crop_tensor = vxCreateTensorFromHandle(vxGetContext((vx_reference)_graph->get()), num_of_dims, _crop_tensor_dims.data(), VX_TYPE_UINT32, 0, - stride, reinterpret_cast(_crop_coordinates), mem_type); + _crop_tensor = vxCreateTensorFromHandle(vxGetContext((vx_reference) _graph->get()), num_of_dims, _crop_tensor_dims.data(), VX_TYPE_UINT32, 0, + stride, reinterpret_cast(_crop_coordinates), mem_type); vx_status status; if ((status = vxGetStatus((vx_reference)_crop_tensor)) != VX_SUCCESS) THROW("Error: vxCreateTensorFromHandle(_crop_tensor: failed " + TOSTR(status)) From 7e927d1f48f797883b0fe3ccf24da8c9275e4361 Mon Sep 17 00:00:00 2001 From: Hansel Yang Date: Tue, 23 Jan 2024 05:50:12 -0800 Subject: [PATCH 23/33] Include Fix for CPU Backend (#93) --- rocAL/source/pipeline/tensor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index 4e3dd2403..bc234e813 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. */ #include -#if !ENABLE_HIP +#if ENABLE_OPENCL #include #endif #include From 77038c5c4f46412f8423b51f827bee53b15746f7 Mon Sep 17 00:00:00 2001 From: Rajy Rawther Date: Tue, 23 Jan 2024 21:31:16 -0800 Subject: [PATCH 24/33] turbojpeg library update for rocAL (#86) * removed dependancy of libjpegturbo * minor fix for the decoder python script * fix broken link in setup.py * add WITH_JPEG8 true building tjpeg * fix for accuracy and convergence --- .../image_augmentation/image_augmentation.cpp | 25 +- docs/examples/image_processing/decoder.py | 13 +- .../image_processing/decoder_examples.ipynb | 17 +- .../image_processing/inference_pipeline.py | 2 +- rocAL-setup.py | 6 +- rocAL/CMakeLists.txt | 9 + .../decoders/image/fused_crop_decoder.h | 2 +- .../decoders/image/turbo_jpeg_decoder.h | 22 +- .../include/decoders/libjpeg/libjpeg_extra.h | 75 +++++ .../include/decoders/libjpeg/libjpeg_utils.h | 30 ++ .../loaders/image/image_read_and_decode.h | 8 - .../decoders/image/fused_crop_decoder.cpp | 4 +- .../decoders/image/turbo_jpeg_decoder.cpp | 47 ++-- .../source/decoders/libjpeg/libjpeg_extra.cpp | 266 ++++++++++++++++++ rocAL_pybind/setup.py | 2 +- 15 files changed, 444 insertions(+), 84 deletions(-) create mode 100644 rocAL/include/decoders/libjpeg/libjpeg_extra.h create mode 100644 rocAL/include/decoders/libjpeg/libjpeg_utils.h create mode 100644 rocAL/source/decoders/libjpeg/libjpeg_extra.cpp diff --git a/apps/image_augmentation/image_augmentation.cpp b/apps/image_augmentation/image_augmentation.cpp index 1286d1862..a21b89393 100644 --- a/apps/image_augmentation/image_augmentation.cpp +++ b/apps/image_augmentation/image_augmentation.cpp @@ -48,12 +48,12 @@ int main(int argc, const char** argv) { if (argc < MIN_ARG_COUNT) { printf( "Usage: image_augmentation \ - decode_width decode_height video_mode gray_scale/rgb display_on_off decode_shard_count \n"); + decode_width decode_height decoder_mode gray_scale/rgb display_on_off decode_shard_count \n"); return -1; } int argIdx = 0; const char* folderPath1 = argv[++argIdx]; - int video_mode = 0; // 0 means no video decode, 1 means hardware, 2 means software decoding + int decoder_mode = 0; // 0 means no video decode, 1 means hardware, 2 means software decoding bool display = 1; // Display the images int aug_depth = 1; // how deep is the augmentation tree int rgb = 1; // process color images @@ -62,7 +62,7 @@ int main(int argc, const char** argv) { bool processing_device = 1; size_t shard_count = 2; int shuffle = 0; - int dec_mode = 0; + int decoder_type = 0; const char *outName = "image_augmentation_app.png"; if (argc >= argIdx + MIN_ARG_COUNT) @@ -75,7 +75,7 @@ int main(int argc, const char** argv) { decode_height = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) - video_mode = atoi(argv[++argIdx]); + decoder_mode = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) rgb = atoi(argv[++argIdx]); @@ -90,7 +90,7 @@ int main(int argc, const char** argv) { shuffle = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) - dec_mode = atoi(argv[++argIdx]); + decoder_type = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) outName = argv[++argIdx]; @@ -108,7 +108,7 @@ int main(int argc, const char** argv) { return -1; } - RocalDecoderType dec_type = (RocalDecoderType)dec_mode; + RocalDecoderType dec_type = (RocalDecoderType)decoder_type; /*>>>>>>>>>>>>>>>> Creating rocAL parameters <<<<<<<<<<<<<<<<*/ @@ -126,7 +126,7 @@ int main(int argc, const char** argv) { /*>>>>>>>>>>>>>>>>>>> Graph description <<<<<<<<<<<<<<<<<<<*/ RocalTensor input1; - if (video_mode != 0) { + if (decoder_mode >= 2) { unsigned sequence_length = 3; unsigned frame_step = 3; unsigned frame_stride = 1; @@ -134,7 +134,12 @@ int main(int argc, const char** argv) { std::cout << "Output width and height is needed for video decode\n"; return -1; } - input1 = rocalVideoFileSource(handle, folderPath1, color_format, ((video_mode == 1) ? RocalDecodeDevice::ROCAL_HW_DECODE : RocalDecodeDevice::ROCAL_SW_DECODE), shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false); + input1 = rocalVideoFileSource(handle, folderPath1, color_format, (decoder_mode == 2)? ROCAL_SW_DECODE: ROCAL_HW_DECODE, shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false); + } else if (decoder_mode == 1) { + std::vector area = {0.08, 1}; + std::vector aspect_ratio = {3.0f / 4, 4.0f / 3}; + input1 = rocalFusedJpegCrop(handle, folderPath1, color_format, shard_count, false, area, aspect_ratio, 10, false, false, ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED, decode_width, decode_height); + } else { // The jpeg file loader can automatically select the best size to decode all images to that size // User can alternatively set the size or change the policy that is used to automatically find the size @@ -152,7 +157,7 @@ int main(int argc, const char** argv) { RocalTensor tensor0; int resize_w = 112, resize_h = 112; - if (video_mode) { + if (decoder_mode >= 2) { resize_h = decode_height; resize_w = decode_width; tensor0 = input1; @@ -214,7 +219,7 @@ int main(int argc, const char** argv) { int w = rocalGetOutputWidth(handle); int p = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? 3 : 1); std::cout << "output width " << w << " output height " << h << " color planes " << p << std::endl; - const unsigned number_of_cols = video_mode ? 1 : 10; + const unsigned number_of_cols = (decoder_mode >= 2) ? 1 : 10; auto cv_color_format = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? CV_8UC3 : CV_8UC1); cv::Mat mat_output(h + AMD_ROCm_Black_resize.rows, w * number_of_cols, cv_color_format); cv::Mat mat_input(h, w, cv_color_format); diff --git a/docs/examples/image_processing/decoder.py b/docs/examples/image_processing/decoder.py index eccce45fd..073fa383c 100644 --- a/docs/examples/image_processing/decoder.py +++ b/docs/examples/image_processing/decoder.py @@ -9,7 +9,7 @@ import cupy as cp seed = 1549361629 -image_dir = "../../../../data/images/AMD-tinyDataSet/" +image_dir = "../../../data/images/AMD-tinyDataSet/" batch_size = 4 gpu_id = 0 @@ -34,13 +34,13 @@ def show_pipeline_output(pipe, device): pipe.build() data_loader = ROCALClassificationIterator(pipe, device) images = next(iter(data_loader)) - show_images(images[0], device) + show_images(images[0][0], device) @pipeline_def(seed=seed) def image_decoder_pipeline(device="cpu", path=image_dir): - jpegs, labels = fn.readers.file(file_root=path, shard_id=0, num_shards=1, random_shuffle=False) + jpegs, labels = fn.readers.file(file_root=path) images = fn.decoders.image(jpegs, file_root=path, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False) - return fn.resize(images, device=device, resize_x=300, resize_y=300) + return fn.resize(images, device=device, resize_width=300, resize_height=300) def main(): print ('Optional arguments: ') @@ -52,9 +52,8 @@ def main(): rocal_device = "gpu" if len(sys.argv) > 2: img_folder = sys.argv[2] - - pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, - reverse_channels=True, mean = [0, 0, 0], std=[255, 255, 255], device=rocal_device, path=img_folder) + pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, + reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=rocal_device, path=img_folder) show_pipeline_output(pipe, device=rocal_device) if __name__ == '__main__': diff --git a/docs/examples/image_processing/decoder_examples.ipynb b/docs/examples/image_processing/decoder_examples.ipynb index 27098f079..cb1bef27e 100644 --- a/docs/examples/image_processing/decoder_examples.ipynb +++ b/docs/examples/image_processing/decoder_examples.ipynb @@ -38,7 +38,7 @@ "%matplotlib inline\n", "\n", "seed = 1549361629\n", - "image_dir = \"../../../../data/images/AMD-tinyDataSet/\"\n", + "image_dir = \"../../../data/images/AMD-tinyDataSet/\"\n", "batch_size = 4\n", "gpu_id = 0\n", "\n", @@ -61,7 +61,7 @@ " pipe.build()\n", " data_loader = ROCALClassificationIterator(pipe, device, device_id)\n", " images = next(iter(data_loader))\n", - " show_images(images[0], device)\n" + " show_images(images[0][0], device)\n" ] }, { @@ -82,9 +82,9 @@ "source": [ "@pipeline_def(seed=seed)\n", "def image_decoder_pipeline(device=\"cpu\"):\n", - " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n", + " jpegs, labels = fn.readers.file(file_root=image_dir)\n", " images = fn.decoders.image(jpegs, file_root=image_dir, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n", - " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n", + " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n", "\n", "pipe = image_decoder_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n", " reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=\"cpu\")\n", @@ -109,12 +109,13 @@ "source": [ "@pipeline_def(seed=seed)\n", "def image_decoder_random_crop_pipeline(device=\"cpu\"):\n", - " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n", + " jpegs, labels = fn.readers.file(file_root=image_dir)\n", " images = fn.decoders.image_slice(jpegs, file_root=image_dir, \n", - " device=device,\n", " output_type=types.RGB,\n", + " shard_id = 0,\n", + " num_shards = 1,\n", " random_shuffle=True)\n", - " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n", + " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n", " \n", "pipe = image_decoder_random_crop_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n", " reverse_channels=True, mean=[0,0,0], std = [255,255,255], device=\"cpu\")\n", @@ -184,7 +185,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/docs/examples/image_processing/inference_pipeline.py b/docs/examples/image_processing/inference_pipeline.py index a7db74e16..f97da7b37 100644 --- a/docs/examples/image_processing/inference_pipeline.py +++ b/docs/examples/image_processing/inference_pipeline.py @@ -31,7 +31,7 @@ seed = 1549361629 -image_dir = "../../../../data/images/AMD-tinyDataSet/" +image_dir = "../../../data/images/AMD-tinyDataSet/" batch_size = 4 gpu_id = 0 diff --git a/rocAL-setup.py b/rocAL-setup.py index fa6b5de91..1032aef6c 100644 --- a/rocAL-setup.py +++ b/rocAL-setup.py @@ -311,11 +311,11 @@ os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + linuxSystemInstall_check+' install lmdb-devel rapidjson-devel') - # turbo-JPEG - https://github.com/rrawther/libjpeg-turbo.git -- 2.0.6.2 + # turbo-JPEG - https://github.com/libjpeg-turbo/libjpeg-turbo.git -- 3.0.1 os.system( - '(cd '+deps_dir+'; git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git )') + '(cd '+deps_dir+'; git clone -b 3.0.1 https://github.com/libjpeg-turbo/libjpeg-turbo.git )') os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake + - ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ..; make -j 4; sudo make install )') + ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..; make -j 4; sudo make install )') # RPP os.system('sudo -v') os.system('(cd '+deps_dir+'; git clone -b '+rppVersion+' https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp.git; cd rpp; mkdir build-'+backend+'; cd build-'+backend+'; ' + diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt index 1dc4630e1..c81ed5f99 100644 --- a/rocAL/CMakeLists.txt +++ b/rocAL/CMakeLists.txt @@ -42,6 +42,14 @@ find_package(RapidJSON QUIET) find_package(StdFilesystem QUIET) find_package(HALF QUIET) +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") +elseif(ROCM_PATH) + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") +endif() + # HIP Backend if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP") if(NOT DEFINED HIP_PATH) @@ -225,6 +233,7 @@ if(${BUILD_ROCAL}) include/augmentations/geometry_augmentations/ include/decoders/image/ include/decoders/video/ + include/decoders/libjpeg/ include/device/ include/loaders/ include/loaders/image/ diff --git a/rocAL/include/decoders/image/fused_crop_decoder.h b/rocAL/include/decoders/image/fused_crop_decoder.h index 718919b90..ae59f6bf1 100644 --- a/rocAL/include/decoders/image/fused_crop_decoder.h +++ b/rocAL/include/decoders/image/fused_crop_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/decoders/image/turbo_jpeg_decoder.h b/rocAL/include/decoders/image/turbo_jpeg_decoder.h index ce4dba600..99e67abac 100644 --- a/rocAL/include/decoders/image/turbo_jpeg_decoder.h +++ b/rocAL/include/decoders/image/turbo_jpeg_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -64,24 +64,8 @@ class TJDecoder : public Decoder { private: tjhandle m_jpegDecompressor; - const static unsigned SCALING_FACTORS_COUNT = 16; - const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = { - {2, 1}, - {15, 8}, - {7, 4}, - {13, 8}, - {3, 2}, - {11, 8}, - {5, 4}, - {9, 8}, - {1, 1}, - {7, 8}, - {3, 4}, - {5, 8}, - {1, 2}, - {3, 8}, - {1, 4}, - {1, 8}}; + tjscalingfactor *_scaling_factors = nullptr; + int _num_scaling_factors = 0; bool _is_partial_decoder = false; std::vector _bbox_coord; const static unsigned _max_scaling_factor = 8; diff --git a/rocAL/include/decoders/libjpeg/libjpeg_extra.h b/rocAL/include/decoders/libjpeg/libjpeg_extra.h new file mode 100644 index 000000000..69db1028a --- /dev/null +++ b/rocAL/include/decoders/libjpeg/libjpeg_extra.h @@ -0,0 +1,75 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include +#include +#include +#include +#include "libjpeg_utils.h" + +extern "C" { + +//! extra apis for rocal to support partial decoding + +//! * Helper function to se the source +//! * This function doesn't scale the decoded image + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * This function doesn't scale the decoded image + +/*! + \param handle TJPeg handle + \param jpegBuf compressed jpeg image buffer + \param jpegSize Size of the compressed data provided in the input_buffer + \param dstBuf user provided output buffer + \param width, pitch, height width, stride and height of the allocated buffer + \param flags TJPEG flags + \param pixelFormat pixel format of the image + \param crop_x_diff, crop_width_diff Actual crop_x and crop_w (adjusted to MB boundery) + \param x1, y1, crop_width, crop_height requested crop window +*/ + +int tjDecompress2_partial(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int *crop_x_diff, unsigned int *crop_width_diff, + unsigned int x1, unsigned int y1, unsigned int crop_width, unsigned int crop_height); + + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * This function scale the decoded image to fit the output dims +/*! + \param handle TJPeg handle + \param jpegBuf compressed jpeg image buffer + \param jpegSize Size of the compressed data provided in the input_buffer + \param dstBuf user provided output buffer + \param width, pitch, height width, stride and height of the allocated buffer + \param flags TJPEG flags + \param crop_width, crop_height requested crop window +*/ + +int tjDecompress2_partial_scale(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int crop_width, unsigned int crop_height); +} \ No newline at end of file diff --git a/rocAL/include/decoders/libjpeg/libjpeg_utils.h b/rocAL/include/decoders/libjpeg/libjpeg_utils.h new file mode 100644 index 000000000..1c588ee0b --- /dev/null +++ b/rocAL/include/decoders/libjpeg/libjpeg_utils.h @@ -0,0 +1,30 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +//! turbojpeg includes + +extern "C" { +#include "jerror.h" +#include "jpeglib.h" +#include "jpegint.h" +} diff --git a/rocAL/include/loaders/image/image_read_and_decode.h b/rocAL/include/loaders/image/image_read_and_decode.h index 471164b54..6682d85f6 100644 --- a/rocAL/include/loaders/image/image_read_and_decode.h +++ b/rocAL/include/loaders/image/image_read_and_decode.h @@ -33,14 +33,6 @@ THE SOFTWARE. #include "timing_debug.h" #include "turbo_jpeg_decoder.h" -/** - * Compute the scaled value of dimension using the given scaling - * factor. This macro performs the integer equivalent of ceil(dimension * - * scalingFactor). - */ -#define TJSCALED(dimension, scalingFactor) \ - ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ - scalingFactor.denom) class ImageReadAndDecode { public: diff --git a/rocAL/source/decoders/image/fused_crop_decoder.cpp b/rocAL/source/decoders/image/fused_crop_decoder.cpp index 2522bca4e..ee14c0f11 100644 --- a/rocAL/source/decoders/image/fused_crop_decoder.cpp +++ b/rocAL/source/decoders/image/fused_crop_decoder.cpp @@ -20,11 +20,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "fused_crop_decoder.h" #include #include #include +#include "fused_crop_decoder.h" +#include "libjpeg_extra.h" + FusedCropTJDecoder::FusedCropTJDecoder() { m_jpegDecompressor = tjInitDecompress(); diff --git a/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp b/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp index 772fc8535..b285e891d 100644 --- a/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp +++ b/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp @@ -20,24 +20,21 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "turbo_jpeg_decoder.h" -#include #include +#include +#include "turbo_jpeg_decoder.h" +#include "libjpeg_extra.h" TJDecoder::TJDecoder() { m_jpegDecompressor = tjInitDecompress(); - -#if 0 - int num_avail_scalings = 0; - auto scaling_factors = tjGetScalingFactors (&num_avail_scalings); - for(int i = 0; i < num_avail_scalings; i++) { - if(scaling_factors[i].num < scaling_factors[i].denom) { - - printf("%d / %d - ",scaling_factors[i].num, scaling_factors[i].denom ); + if ((_scaling_factors = tj3GetScalingFactors(&_num_scaling_factors)) == NULL) + THROW("tjDecompress2_partial_scale(): error getting scaling factors"); + for(int i = 0; i < _num_scaling_factors; i++) { + if(_scaling_factors[i].num < _scaling_factors[i].denom) { + INFO(STR(_scaling_factors[i].num) + "/" + STR(_scaling_factors[i].denom)); } } -#endif }; Decoder::Status TJDecoder::decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) { @@ -90,7 +87,7 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size crop_width = _max_scaling_factor * max_decoded_width; if (crop_width > original_image_width) crop_width = original_image_width; crop_height = crop_width * (1.0 / in_ratio); - if (crop_height > _max_scaling_factor * max_decoded_width) crop_height = _max_scaling_factor * max_decoded_width; + if (crop_height > _max_scaling_factor * max_decoded_height) crop_height = _max_scaling_factor * max_decoded_height; } else if (original_image_height > (_max_scaling_factor * max_decoded_height)) { crop_height = _max_scaling_factor * max_decoded_height; if (crop_height > original_image_height) crop_height = original_image_height; @@ -114,9 +111,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size } // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder uint scaledw = max_decoded_width, scaledh = max_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(crop_width, scaling_factor); - scaledh = TJSCALED(crop_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) { break; } @@ -142,9 +139,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size } // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder uint scaledw = max_decoded_width, scaledh = max_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(original_image_width, scaling_factor); - scaledh = TJSCALED(original_image_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) break; } @@ -168,7 +165,7 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size crop_width = _max_scaling_factor * max_decoded_width; if (crop_width > original_image_width) crop_width = original_image_width; crop_height = crop_width * (1.0 / in_ratio); - if (crop_height > _max_scaling_factor * max_decoded_width) crop_height = _max_scaling_factor * max_decoded_width; + if (crop_height > _max_scaling_factor * max_decoded_height) crop_height = _max_scaling_factor * max_decoded_height; } else if (original_image_height > (_max_scaling_factor * max_decoded_height)) { crop_height = _max_scaling_factor * max_decoded_height; if (crop_height > original_image_height) crop_height = original_image_height; @@ -192,9 +189,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size } // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder uint scaledw = max_decoded_width, scaledh = max_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(crop_width, scaling_factor); - scaledh = TJSCALED(crop_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) { break; } @@ -219,9 +216,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder if ((actual_decoded_width != original_image_width) || (actual_decoded_height != original_image_height)) { uint scaledw = actual_decoded_width, scaledh = actual_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(original_image_width, scaling_factor); - scaledh = TJSCALED(original_image_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) break; } diff --git a/rocAL/source/decoders/libjpeg/libjpeg_extra.cpp b/rocAL/source/decoders/libjpeg/libjpeg_extra.cpp new file mode 100644 index 000000000..ca86f644a --- /dev/null +++ b/rocAL/source/decoders/libjpeg/libjpeg_extra.cpp @@ -0,0 +1,266 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of inst software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and inst permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "libjpeg_extra.h" +#include +#include +#include "commons.h" + +enum { COMPRESS = 1, DECOMPRESS = 2 }; +static J_COLOR_SPACE pf2cs[TJ_NUMPF] = { + JCS_EXT_RGB, JCS_EXT_BGR, JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, + JCS_EXT_XRGB, JCS_GRAYSCALE, JCS_EXT_RGBA, JCS_EXT_BGRA, JCS_EXT_ABGR, + JCS_EXT_ARGB, JCS_CMYK +}; + +struct my_error_mgr { + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; + void (*emit_message) (j_common_ptr, int); + boolean warning, stopOnWarning; +}; +typedef struct my_error_mgr *my_error_ptr; + +/* + * Here's the routine that will replace the standard error_exit method: + */ + +METHODDEF(void) +my_error_exit(j_common_ptr cinfo) +{ + /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */ + my_error_ptr myerr = (my_error_ptr)cinfo->err; + + /* Always display the message. */ + /* We could postpone this until after returning, if we chose. */ + (*cinfo->err->output_message) (cinfo); + + /* Return control to the setjmp point */ + longjmp(myerr->setjmp_buffer, 1); +} + + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * inst function doesn't scale the decoded image +int tjDecompress2_partial(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int *crop_x_diff, unsigned int *crop_width_diff, + unsigned int crop_x, unsigned int crop_y, + unsigned int crop_width, unsigned int crop_height) +{ + JSAMPROW *row_pointer = NULL; + int i, retval = 0; + + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || + pitch < 0 || height < 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF) + THROW("tjDecompress2_partial(): Invalid argument"); + + struct jpeg_decompress_struct cinfo; + // Initialize libjpeg structures to have a memory source + // Modify the usual jpeg error manager to catch fatal errors. + struct my_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + // set up, read header, set image parameters, save size + jpeg_create_decompress(&cinfo); + jpeg_mem_src(&cinfo, jpegBuf, jpegSize); + jpeg_read_header(&cinfo, TRUE); + cinfo.out_color_space = pf2cs[pixelFormat]; + if (flags & TJFLAG_FASTDCT) cinfo.dct_method = JDCT_FASTEST; + if (flags & TJFLAG_FASTUPSAMPLE) cinfo.do_fancy_upsampling = FALSE; + + jpeg_start_decompress(&cinfo); + /* Check for valid crop dimensions. We cannot check these values until + * after jpeg_start_decompress() is called. + */ + if (crop_x + crop_width > cinfo.output_width || crop_y + crop_height > cinfo.output_height) { + ERR("crop dimensions:" << crop_width << " x " << crop_height << " exceed image dimensions" << + cinfo.output_width << " x " << cinfo.output_height); + retval = -1; goto bailout; + } + + jpeg_crop_scanline(&cinfo, &crop_x, &crop_width); + *crop_x_diff = crop_x; + *crop_width_diff = crop_width; + + if (pitch == 0) pitch = cinfo.output_width * tjPixelSize[pixelFormat]; + + if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo.output_height)) == NULL) { + THROW("tjDecompress2_partial(): Memory allocation failure"); + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + } + + // set row pointer for destination + for (i = 0; i < (int)cinfo.output_height; i++) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = &dstBuf[(cinfo.output_height - i - 1) * (size_t)pitch]; + else + row_pointer[i] = &dstBuf[i * (size_t)pitch]; + } + + /* Process data */ + JDIMENSION num_scanlines; + jpeg_skip_scanlines(&cinfo, crop_y); + while (cinfo.output_scanline < crop_y + crop_height) { + if (cinfo.output_scanline < crop_y) + num_scanlines = jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline], + crop_y + crop_height - cinfo.output_scanline); + else + num_scanlines = jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline - crop_y], + crop_y + crop_height - cinfo.output_scanline); + if (num_scanlines == 0){ + ERR("Premature end of Jpeg data. Stopped at " << cinfo.output_scanline - crop_y << "/" + << cinfo.output_height) + } + } + jpeg_skip_scanlines(&cinfo, cinfo.output_height - crop_y - crop_height); + jpeg_finish_decompress(&cinfo); + + bailout: + if (cinfo.global_state > DSTATE_START) jpeg_abort_decompress(&cinfo); + if (row_pointer) free(row_pointer); + return retval; +} + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * inst function scale the decoded image to fit the output dims + +int tjDecompress2_partial_scale(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int crop_width, unsigned int crop_height) +{ + JSAMPROW *row_pointer = NULL; + int i, retval = 0, jpegwidth, jpegheight; + unsigned int scaledw, scaledh, crop_x, crop_y, max_crop_width; + tjscalingfactor *scalingFactors = NULL; + int numScalingFactors = 0; + + unsigned char *tmp_row = NULL; + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || + pitch < 0 || height < 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF) { + THROW("tjDecompress2_partial_scale(): Invalid argument"); + } + + struct jpeg_decompress_struct cinfo; + // Initialize libjpeg structures to have a memory source + // Modify the usual jpeg error manager to catch fatal errors. + struct my_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + jpeg_mem_src(&cinfo, jpegBuf, jpegSize); + jpeg_read_header(&cinfo, TRUE); + cinfo.out_color_space = pf2cs[pixelFormat]; + if (flags & TJFLAG_FASTDCT) cinfo.dct_method = JDCT_FASTEST; + if (flags & TJFLAG_FASTUPSAMPLE) cinfo.do_fancy_upsampling = FALSE; + + jpegwidth = cinfo.image_width; jpegheight = cinfo.image_height; + if (width == 0) width = jpegwidth; + if (height == 0) height = jpegheight; + if ((scalingFactors = tj3GetScalingFactors(&numScalingFactors)) == NULL) + THROW("tjDecompress2_partial_scale(): error getting scaling factors"); + + for (i = 0; i < numScalingFactors; i++) { + scaledw = TJSCALED(crop_width, scalingFactors[i]); + scaledh = TJSCALED(crop_height, scalingFactors[i]); + if (scaledw <= (unsigned int)width && scaledh <= (unsigned int)height) + break; + } + + if (i >= numScalingFactors) + THROW("tjDecompress2_partial_scale(): Could not scale down to desired image dimensions"); + + if (cinfo.num_components > 3) + THROW("tjDecompress2_partial_scale(): JPEG image must have 3 or fewer components"); + + //width = scaledw; height = scaledh; + cinfo.scale_num = scalingFactors[i].num; + cinfo.scale_denom = scalingFactors[i].denom; + + jpeg_start_decompress(&cinfo); + crop_x = cinfo.output_width - scaledw; + crop_y = cinfo.output_height - scaledh; + + /* Check for valid crop dimensions. We cannot check these values until + * after jpeg_start_decompress() is called. + */ + if (crop_x + scaledw > cinfo.output_width || scaledh > cinfo.output_height) { + ERR("crop dimensions:" << crop_x + scaledw << " x " << scaledh << " exceed image dimensions" << + cinfo.output_width << " x " << cinfo.output_height); + retval = -1; goto bailout; + } + + if (pitch == 0) pitch = cinfo.output_width * tjPixelSize[pixelFormat]; + + if ((row_pointer = + (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo.output_height)) == NULL) + THROW("tjDecompress2_partial_scale(): Memory allocation failure"); + // allocate row of tmp storage for storing discarded data + tmp_row = (unsigned char *)malloc((size_t)pitch); + + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + for (i = 0; i < (int)cinfo.output_height; i++) { + if (i < height) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = &dstBuf[(cinfo.output_height - i - 1) * (size_t)pitch]; + else + row_pointer[i] = &dstBuf[i * (size_t)pitch]; + } else { + row_pointer[i] = tmp_row; + } + } + // the width for the crop shouln't exceed output_width + max_crop_width = scaledw; + jpeg_crop_scanline(&cinfo, &crop_x, &max_crop_width); + jpeg_skip_scanlines(&cinfo, crop_y); + while (cinfo.output_scanline < cinfo.output_height) { + if (cinfo.output_scanline < crop_y) + jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline], cinfo.output_height - cinfo.output_scanline); + else + jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline- crop_y], cinfo.output_height - cinfo.output_scanline); + } + jpeg_finish_decompress(&cinfo); + + bailout: + if (cinfo.global_state > DSTATE_START) jpeg_abort_decompress(&cinfo); + if (row_pointer) free(row_pointer); + if (tmp_row) free(tmp_row); + return retval; +} diff --git a/rocAL_pybind/setup.py b/rocAL_pybind/setup.py index 7d3598d35..9ee8e57ea 100644 --- a/rocAL_pybind/setup.py +++ b/rocAL_pybind/setup.py @@ -36,7 +36,7 @@ def has_ext_modules(self): setup( name='amd-rocal', description='AMD ROCm Augmentation Library', - url='https://github.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/rocAL', + url='https://github.com/ROCm/rocAL', version='1.0.0', author='AMD', license='Apache License 2.0', From 64d3a8366ead6d587d982331855c041869bd5bca Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Wed, 24 Jan 2024 14:43:21 +0000 Subject: [PATCH 25/33] Fixing build issues --- rocAL/include/loaders/image/numpy_loader.h | 2 +- rocAL/include/loaders/image/numpy_loader_sharded.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocAL/include/loaders/image/numpy_loader.h b/rocAL/include/loaders/image/numpy_loader.h index 2c3285561..ec3b5955e 100644 --- a/rocAL/include/loaders/image/numpy_loader.h +++ b/rocAL/include/loaders/image/numpy_loader.h @@ -54,7 +54,7 @@ class NumpyLoader : public LoaderModule { void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, - const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, int channels, ExternalSourceFileMode mode, bool eos) override {} + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} private: bool is_out_of_data(); diff --git a/rocAL/include/loaders/image/numpy_loader_sharded.h b/rocAL/include/loaders/image/numpy_loader_sharded.h index ada22c06b..ee55eff73 100644 --- a/rocAL/include/loaders/image/numpy_loader_sharded.h +++ b/rocAL/include/loaders/image/numpy_loader_sharded.h @@ -46,7 +46,7 @@ class NumpyLoaderSharded : public LoaderModule { void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, - const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, int channels, ExternalSourceFileMode mode, bool eos) override {} + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} private: void increment_loader_idx(); From b02002115365ddcfbf0a7bc8a34d09124ff08256 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Wed, 24 Jan 2024 15:25:29 +0000 Subject: [PATCH 26/33] Fixing bug with ROI changes for deepcam --- rocAL/include/pipeline/tensor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index 14daf513c..0fb722da0 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -205,7 +205,6 @@ class TensorInfo { get_modified_dims_from_layout(_layout, layout, new_dims); _dims = new_dims; modify_strides(); - _max_shape.assign(_dims.begin() + 1, _dims.end()); } _layout = layout; if (_layout == RocalTensorlayout::NHWC || _layout == RocalTensorlayout::NDHWC) { From 249704711b0a6a6aab945b6e4aff458d24f08141 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 25 Jan 2024 18:37:51 +0000 Subject: [PATCH 27/33] Adding parameterVX changes --- rocAL/include/parameters/parameter.h | 4 ++ rocAL/include/parameters/parameter_factory.h | 6 ++ rocAL/include/parameters/parameter_random.h | 61 ++++++++++++++++++- rocAL/include/parameters/parameter_simple.h | 30 ++++++++- rocAL/include/parameters/parameter_vx.h | 20 +++--- rocAL/source/parameters/parameter_factory.cpp | 21 +++++-- 6 files changed, 125 insertions(+), 17 deletions(-) diff --git a/rocAL/include/parameters/parameter.h b/rocAL/include/parameters/parameter.h index 723c3dbd7..1bec7b334 100644 --- a/rocAL/include/parameters/parameter.h +++ b/rocAL/include/parameters/parameter.h @@ -33,6 +33,10 @@ class Parameter { /// used to internally renew state of the parameter if needed (for random parameters) virtual void renew(){}; + virtual void create_array(unsigned batch_size){}; + + virtual std::vector get_array() { return {}; }; + virtual ~Parameter() {} /// /// \return returns if this parameter takes a single value (vs a range of values or many values) diff --git a/rocAL/include/parameters/parameter_factory.h b/rocAL/include/parameters/parameter_factory.h index ccd3b4d2c..582d51fb5 100644 --- a/rocAL/include/parameters/parameter_factory.h +++ b/rocAL/include/parameters/parameter_factory.h @@ -29,6 +29,8 @@ THE SOFTWARE. #include "parameter_random.h" #include "parameter_simple.h" +const int MAX_SEEDS = 1024; + enum class RocalParameterType { DETERMINISTIC = 0, RANDOM_UNIFORM, @@ -72,6 +74,8 @@ class ParameterFactory { void set_seed(unsigned seed); unsigned get_seed(); void generate_seed(); + int64_t get_seed_from_seedsequence(); + void increment_seed_sequence_idx(); template Parameter* create_uniform_rand_param(T start, T end) { @@ -104,4 +108,6 @@ class ParameterFactory { static ParameterFactory* _instance; static std::mutex _mutex; ParameterFactory(); + std::vector _seed_vector; + int _seed_sequence_idx = 0; }; diff --git a/rocAL/include/parameters/parameter_random.h b/rocAL/include/parameters/parameter_random.h index c379a894f..0df53e856 100644 --- a/rocAL/include/parameters/parameter_random.h +++ b/rocAL/include/parameters/parameter_random.h @@ -51,7 +51,12 @@ class UniformRand : public Parameter { T get() override { return _updated_val; }; - void renew() override { + + std::vector get_array() override { + return _array; + } + + void renew_value() { std::unique_lock lock(_lock); auto val = _generator(); @@ -64,6 +69,21 @@ class UniformRand : public Parameter { ((double)val / (double)_generator.max()) * ((double)_end - (double)_start) + (double)_start); } } + + void renew_array() { + for (uint i = 0; i < _batch_size; i++) { + renew_value(); + _array[i] = _updated_val; + } + } + + void renew() override { + if (_array.size() > 0) { + renew_array(); + } else { + renew_value(); + } + } int update(T start, T end) { std::unique_lock lock(_lock); if (end < start) @@ -73,6 +93,13 @@ class UniformRand : public Parameter { _end = end; return 0; } + + void create_array(unsigned batch_size) override { + if (_array.size() == 0) + _array.resize(batch_size); + _batch_size = batch_size; + } + bool single_value() const override { return (_start == _end); } @@ -81,8 +108,10 @@ class UniformRand : public Parameter { T _start; T _end; T _updated_val; + std::vector _array; std::mt19937 _generator; std::mutex _lock; + unsigned _batch_size; }; template @@ -142,7 +171,8 @@ struct CustomRand : public Parameter { T default_value() const override { return static_cast(_mean); } - void renew() override { + + void renew_value() { std::unique_lock lock(_lock); if (single_value()) { // If there is only a single value possible for the random variable @@ -161,10 +191,35 @@ struct CustomRand : public Parameter { _updated_val = _values[idx]; } } + + void renew_array() { + for (uint i = 0; i < _batch_size; i++) { + renew_value(); + _array[i] = _updated_val; + } + } + + void renew() override { + if (_array.size() > 0) { + renew_array(); + } else { + renew_value(); + } + } T get() override { return _updated_val; }; + std::vector get_array() override { + return _array; + } + + void create_array(unsigned batch_size) override { + if (_array.size() == 0) + _array.resize(batch_size); + _batch_size = batch_size; + } + bool single_value() const override { return (_values.size() == 1); } @@ -175,6 +230,8 @@ struct CustomRand : public Parameter { std::vector _comltv_dist; //!< commulative probabilities double _mean; T _updated_val; + std::vector _array; std::mt19937 _generator; std::mutex _lock; + unsigned _batch_size; }; \ No newline at end of file diff --git a/rocAL/include/parameters/parameter_simple.h b/rocAL/include/parameters/parameter_simple.h index d3fb0dc3f..c1ee1d5a2 100644 --- a/rocAL/include/parameters/parameter_simple.h +++ b/rocAL/include/parameters/parameter_simple.h @@ -35,11 +35,37 @@ class SimpleParameter : public Parameter { T get() override { return _val; } - int update(T new_val) { + + std::vector get_array() override { + return _array; + } + + void update_single_value(T new_val) { _val = new_val; + } + + void update_array(T new_val) { + for (uint i = 0; i < _batch_size; i++) { + update_single_value(new_val); + _array[i] = _val; + } + } + + int update(T new_val) { + if (_array.size() > 0) + update_array(new_val); + else + update_single_value(new_val); return 0; } + void create_array(unsigned batch_size) override { + if (_array.size() == 0) + _array.resize(batch_size); + _batch_size = batch_size; + update(_val); + } + ~SimpleParameter() = default; bool single_value() const override { @@ -48,6 +74,8 @@ class SimpleParameter : public Parameter { private: T _val; + std::vector _array; + unsigned _batch_size; }; using pIntParam = std::shared_ptr>; using pFloatParam = std::shared_ptr>; diff --git a/rocAL/include/parameters/parameter_vx.h b/rocAL/include/parameters/parameter_vx.h index e63da998f..e71cd48ee 100644 --- a/rocAL/include/parameters/parameter_vx.h +++ b/rocAL/include/parameters/parameter_vx.h @@ -52,11 +52,12 @@ class ParameterVX { THROW("Reading vx scalar failed" + TOSTR(status)); } void create_array(std::shared_ptr graph, vx_enum data_type, unsigned batch_size) { - // _arrVal = (T*)malloc(sizeof(T) * _batch_size); _batch_size = batch_size; - _arrVal.resize(_batch_size); + _param->create_array(_batch_size); _array = vxCreateArray(vxGetContext((vx_reference)graph->get()), data_type, _batch_size); - vxAddArrayItems(_array, _batch_size, _arrVal.data(), sizeof(T)); + auto status = vxAddArrayItems(_array, _batch_size, get_array().data(), sizeof(T)); + if (status != 0) + THROW(" vxAddArrayItems failed in create_array (ParameterVX): " + TOSTR(status)) update_array(); } void set_param(Parameter* param) { @@ -96,11 +97,7 @@ class ParameterVX { } void update_array() { vx_status status; - for (uint i = 0; i < _batch_size; i++) { - _arrVal[i] = renew(); - // INFO("update_array: " + TOSTR(i) + "," + TOSTR(_arrVal[i])); - } - status = vxCopyArrayRange((vx_array)_array, 0, _batch_size, sizeof(T), _arrVal.data(), VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); + status = vxCopyArrayRange((vx_array)_array, 0, _batch_size, sizeof(T), get_array().data(), VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST); if (status != 0) THROW(" vxCopyArrayRange failed in update_array (ParameterVX): " + TOSTR(status)) } @@ -109,12 +106,15 @@ class ParameterVX { return _param->get(); } + std::vector get_array() { + return _param->get_array(); + } + private: vx_scalar _scalar; - vx_array _array; + vx_array _array = nullptr; Parameter* _param; T _val; - std::vector _arrVal; unsigned _batch_size; unsigned OVX_PARAM_IDX; const T _DEFAULT_RANGE_START; diff --git a/rocAL/source/parameters/parameter_factory.cpp b/rocAL/source/parameters/parameter_factory.cpp index 6f3800bb4..cb31a55b8 100644 --- a/rocAL/source/parameters/parameter_factory.cpp +++ b/rocAL/source/parameters/parameter_factory.cpp @@ -104,33 +104,46 @@ void ParameterFactory::generate_seed() { _seed = rd(); } +int64_t +ParameterFactory::get_seed_from_seedsequence() { + increment_seed_sequence_idx(); + return _seed_vector[_seed_sequence_idx]; +} + +void ParameterFactory::increment_seed_sequence_idx() { + _seed_sequence_idx = (_seed_sequence_idx + 1) % MAX_SEEDS; +} + void ParameterFactory::set_seed(unsigned seed) { _seed = seed; + _seed_vector.resize(MAX_SEEDS); + std::seed_seq ss{seed}; + ss.generate(_seed_vector.begin(), _seed_vector.end()); } IntParam* ParameterFactory::create_uniform_int_rand_param(int start, int end) { - auto gen = new UniformRand(start, end, _seed); + auto gen = new UniformRand(start, end, get_seed_from_seedsequence()); auto ret = new IntParam(gen, RocalParameterType::RANDOM_UNIFORM); _parameters.insert(gen); return ret; } FloatParam* ParameterFactory::create_uniform_float_rand_param(float start, float end) { - auto gen = new UniformRand(start, end, _seed); + auto gen = new UniformRand(start, end, get_seed_from_seedsequence()); auto ret = new FloatParam(gen, RocalParameterType::RANDOM_UNIFORM); _parameters.insert(gen); return ret; } IntParam* ParameterFactory::create_custom_int_rand_param(const int* value, const double* frequencies, size_t size) { - auto gen = new CustomRand(value, frequencies, size, _seed); + auto gen = new CustomRand(value, frequencies, size, get_seed_from_seedsequence()); auto ret = new IntParam(gen, RocalParameterType::RANDOM_CUSTOM); _parameters.insert(gen); return ret; } FloatParam* ParameterFactory::create_custom_float_rand_param(const float* value, const double* frequencies, size_t size) { - auto gen = new CustomRand(value, frequencies, size, _seed); + auto gen = new CustomRand(value, frequencies, size, get_seed_from_seedsequence()); auto ret = new FloatParam(gen, RocalParameterType::RANDOM_CUSTOM); _parameters.insert(gen); return ret; From d0f9a87d367f883c56062d565187afb3a7594960 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 25 Jan 2024 18:49:40 +0000 Subject: [PATCH 28/33] Adding ROI changes for numpy reader --- rocAL/include/pipeline/tensor.h | 17 +++++++++------ rocAL/source/pipeline/tensor.cpp | 37 +++++++++++--------------------- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index 244d6563c..86eb8e9be 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -183,17 +183,17 @@ class TensorInfo { _channels = _dims.at(2); } else if (_layout == RocalTensorlayout::NDHWC) { _is_image = false; - _max_shape.resize(3); - _max_shape = {_dims.at(1), _dims.at(2), _dims.at(3)}; + _max_shape.resize(4); + _max_shape.assign(_dims.begin() + 1, _dims.end()); _channels = _dims.at(4); } else if (_layout == RocalTensorlayout::NCDHW) { _is_image = false; - _max_shape.resize(3); - _max_shape = {_dims.at(2), _dims.at(3), _dims.at(4)}; + _max_shape.resize(4); + _max_shape.assign(_dims.begin() + 1, _dims.end()); _channels = _dims.at(1); } } else { - if (!_max_shape.size()) _max_shape.resize(_num_of_dims - 1, 0); // Since 2 values will be stored in the vector + if (!_max_shape.size()) _max_shape.resize(_num_of_dims - 1, 0); _max_shape.assign(_dims.begin() + 1, _dims.end()); } reset_tensor_roi_buffers(); @@ -207,8 +207,11 @@ class TensorInfo { modify_strides(); } _layout = layout; - if (_layout == RocalTensorlayout::NONE) - set_max_shape(); + if (_layout == RocalTensorlayout::NHWC || _layout == RocalTensorlayout::NDHWC) { + _channels = _dims.back(); + } else if (_layout == RocalTensorlayout::NCHW || _layout == RocalTensorlayout::NCDHW) { + _channels = _dims.at(1); + } } void set_dims(std::vector& new_dims) { if (_num_of_dims == new_dims.size()) { diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index e35428bf6..69fd8c60f 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -77,6 +77,10 @@ vx_enum interpret_tensor_data_type(RocalTensorDataType data_type) { return VX_TYPE_FLOAT16; case RocalTensorDataType::UINT8: return VX_TYPE_UINT8; + case RocalTensorDataType::UINT32: + return VX_TYPE_UINT32; + case RocalTensorDataType::INT32: + return VX_TYPE_INT32; default: THROW("Unsupported Tensor type " + TOSTR(data_type)) } @@ -108,30 +112,21 @@ bool operator==(const TensorInfo &rhs, const TensorInfo &lhs) { void TensorInfo::reset_tensor_roi_buffers() { unsigned *roi_buf; - auto roi_no_of_dims = _is_image ? 2 : (_num_of_dims - 2); + auto roi_no_of_dims = _is_image ? 2 : (_num_of_dims - 1); auto roi_size = (_layout == RocalTensorlayout::NFCHW || _layout == RocalTensorlayout::NFHWC) ? _dims[0] * _dims[1] : _batch_size; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions allocate_host_or_pinned_mem((void **)&roi_buf, roi_size * roi_no_of_dims * 2 * sizeof(unsigned), _mem_type); _roi.set_ptr(roi_buf, _mem_type, roi_size, roi_no_of_dims); - if (_layout == RocalTensorlayout::NCDHW) { - for (unsigned i = 0; i < _batch_size; i++) { - unsigned *tensor_shape = _roi[i].end; - tensor_shape[2] = _max_shape[1]; - tensor_shape[1] = _max_shape[2]; - tensor_shape[0] = _max_shape[3]; - } - } else if (_layout == RocalTensorlayout::NDHWC) { - for (unsigned i = 0; i < _batch_size; i++) { - unsigned *tensor_shape = _roi[i].end; - tensor_shape[2] = _max_shape[0]; - tensor_shape[1] = _max_shape[1]; - tensor_shape[0] = _max_shape[2]; - } - } else if (_is_image) { + if (_is_image) { Roi2DCords *roi = _roi.get_2D_roi(); for (unsigned i = 0; i < _batch_size; i++) { roi[i].xywh.w = _max_shape.at(0); roi[i].xywh.h = _max_shape.at(1); } + } else { + for (unsigned i = 0; i < _batch_size; i++) { + unsigned *tensor_shape = _roi[i].end; + tensor_shape[i] = _max_shape[i]; + } } } @@ -226,14 +221,8 @@ void Tensor::update_tensor_roi(const std::vector> &shape) THROW("The number of dims to be updated and the num of dims of tensor info does not match") unsigned *tensor_shape = _info.roi()[i].end; - if (_info.layout() == RocalTensorlayout::NCDHW) { - tensor_shape[2] = shape[i][1] > max_shape[1] ? max_shape[1] : shape[i][1]; - tensor_shape[1] = shape[i][2] > max_shape[2] ? max_shape[2] : shape[i][2]; - tensor_shape[0] = shape[i][3] > max_shape[3] ? max_shape[3] : shape[i][3]; - } else if (_info.layout() == RocalTensorlayout::NDHWC) { - tensor_shape[2] = shape[i][0] > max_shape[0] ? max_shape[0] : shape[i][0]; - tensor_shape[1] = shape[i][1] > max_shape[1] ? max_shape[1] : shape[i][1]; - tensor_shape[0] = shape[i][2] > max_shape[2] ? max_shape[2] : shape[i][2]; + for (unsigned j = 0; j < max_shape.size(); j++) { + tensor_shape[j] = shape[i][j] > max_shape[j] ? max_shape[j] : shape[i][j]; } } } From 338fe5ea6cd93e872157875a4e8e389439fe9de9 Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 25 Jan 2024 18:50:01 +0000 Subject: [PATCH 29/33] Adding setLayout function for numpy reader --- rocAL/include/api/rocal_api_augmentation.h | 2 ++ rocAL/source/api/rocal_api_augmentation.cpp | 23 +++++++++++++++++++++ rocAL_pybind/rocal_pybind.cpp | 2 ++ 3 files changed, 27 insertions(+) diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index e9c9a68b0..0ff3b08f5 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -1098,4 +1098,6 @@ extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, R RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +extern "C" RocalTensor ROCAL_API_CALL rocalSetLayout(RocalContext context, RocalTensor input, + RocalTensorLayout output_layout = ROCAL_NONE); #endif // MIVISIONX_ROCAL_API_AUGMENTATION_H diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index 33fb5b57a..f746c3c4f 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -2155,3 +2155,26 @@ rocalNop( } return output; } + +RocalTensor ROCAL_API_CALL +rocalSetLayout( + RocalContext p_context, + RocalTensor p_input, + RocalTensorLayout output_layout) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input tensor") + return output; + } + + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorlayout op_tensor_layout = static_cast(output_layout); + input->set_layout(op_tensor_layout); + } catch (const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return input; +} diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index ae0623f20..45235dcd7 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -641,6 +641,8 @@ PYBIND11_MODULE(rocal_pybind, m) { m.def("rocalResetLoaders", &rocalResetLoaders); m.def("videoMetaDataReader", &rocalCreateVideoLabelReader, py::return_value_policy::reference); // rocal_api_augmentation.h + m.def("setLayout", &rocalSetLayout, + py::return_value_policy::reference); m.def("ssdRandomCrop", &rocalSSDRandomCrop, py::return_value_policy::reference); m.def("resize", &rocalResize, From dd6c61660dc741c8baa2ac426bb7f41cac343b2a Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 25 Jan 2024 19:03:12 +0000 Subject: [PATCH 30/33] Fixing numpy header order issue in numpy reader --- .../include/readers/image/numpy_data_reader.h | 1 + .../readers/image/numpy_data_reader.cpp | 33 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/rocAL/include/readers/image/numpy_data_reader.h b/rocAL/include/readers/image/numpy_data_reader.h index cfee3e3c4..70c947a4f 100644 --- a/rocAL/include/readers/image/numpy_data_reader.h +++ b/rocAL/include/readers/image/numpy_data_reader.h @@ -83,6 +83,7 @@ class NumpyDataReader : public Reader { unsigned _curr_file_idx; FILE* _current_fPtr; unsigned _current_file_size; + NumpyHeaderData _curr_file_header; std::string _last_id; std::string _last_file_name; size_t _shard_id = 0; diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index aba25c480..8090514a7 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -89,7 +89,8 @@ void NumpyDataReader::incremenet_read_ptr() { } size_t NumpyDataReader::open() { - auto file_path = _file_names[_curr_file_idx]; // Get next file name + auto file_path = _file_names[_curr_file_idx]; // Get current file name + _curr_file_header = _file_headers[_curr_file_idx]; // Get current file header incremenet_read_ptr(); _last_id = file_path; auto last_slash_idx = _last_id.find_last_of("\\/"); @@ -97,10 +98,10 @@ size_t NumpyDataReader::open() { _last_id.erase(0, last_slash_idx + 1); } - auto ret = GetFromCache(file_path, _file_headers[_curr_file_idx]); + auto ret = GetFromCache(file_path, _curr_file_header); if (!ret) { - ParseHeader(_file_headers[_curr_file_idx], file_path); - UpdateCache(file_path, _file_headers[_curr_file_idx]); + ParseHeader(_curr_file_header, file_path); + UpdateCache(file_path, _curr_file_header); } else { _current_fPtr = std::fopen(file_path.c_str(), "rb"); if (_current_fPtr == nullptr) @@ -108,7 +109,7 @@ size_t NumpyDataReader::open() { } fseek(_current_fPtr, 0, SEEK_SET); // Take the file pointer back to the start - return _file_headers[_curr_file_idx].nbytes(); + return _curr_file_header.nbytes(); } bool NumpyDataReader::GetFromCache(const std::string& file_name, NumpyHeaderData& header) { @@ -321,10 +322,10 @@ size_t NumpyDataReader::read_numpy_data(void* buf, size_t read_size, std::vector // Requested read size bigger than the file size? just read as many bytes as the file size read_size = (read_size > _current_file_size) ? _current_file_size : read_size; - if (std::fseek(_current_fPtr, _file_headers[_curr_file_idx]._data_offset, SEEK_SET)) + if (std::fseek(_current_fPtr, _curr_file_header._data_offset, SEEK_SET)) THROW("Seek operation failed: " + std::strerror(errno)); - auto shape = _file_headers[_curr_file_idx].shape(); + auto shape = _curr_file_header.shape(); auto num_dims = max_shape.size(); std::vector strides(num_dims + 1); strides[num_dims] = 1; @@ -333,21 +334,21 @@ size_t NumpyDataReader::read_numpy_data(void* buf, size_t read_size, std::vector } size_t actual_read_size = 0; - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::UINT8) + if (_curr_file_header.type() == RocalTensorDataType::UINT8) actual_read_size = ParseNumpyData((u_int8_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::UINT32) + if (_curr_file_header.type() == RocalTensorDataType::UINT32) actual_read_size = ParseNumpyData((u_int32_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::INT8) + if (_curr_file_header.type() == RocalTensorDataType::INT8) actual_read_size = ParseNumpyData((int8_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::INT32) + if (_curr_file_header.type() == RocalTensorDataType::INT32) actual_read_size = ParseNumpyData((int32_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::FP16) + if (_curr_file_header.type() == RocalTensorDataType::FP16) #if defined(AMD_FP16_SUPPORT) actual_read_size = ParseNumpyData((half*)buf, strides, shape); #else THROW("FLOAT16 type tensor not supported") #endif - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::FP32) + if (_curr_file_header.type() == RocalTensorDataType::FP32) actual_read_size = ParseNumpyData((float*)buf, strides, shape); return actual_read_size; @@ -369,7 +370,7 @@ size_t NumpyDataReader::ParseNumpyData(T* buf, std::vector strides, st } const NumpyHeaderData NumpyDataReader::get_numpy_header_data() { - return _file_headers[_curr_file_idx]; + return _curr_file_header; } size_t NumpyDataReader::read_data(unsigned char* buf, size_t read_size) { @@ -379,10 +380,10 @@ size_t NumpyDataReader::read_data(unsigned char* buf, size_t read_size) { // Requested read size bigger than the file size? just read as many bytes as the file size read_size = (read_size > _current_file_size) ? _current_file_size : read_size; - if (std::fseek(_current_fPtr, _file_headers[_curr_file_idx]._data_offset, SEEK_SET)) + if (std::fseek(_current_fPtr, _curr_file_header._data_offset, SEEK_SET)) THROW("Seek operation failed: " + std::strerror(errno)); - size_t actual_read_size = std::fread(buf, 1, _file_headers[_curr_file_idx].nbytes(), _current_fPtr); + size_t actual_read_size = std::fread(buf, 1, _curr_file_header.nbytes(), _current_fPtr); return actual_read_size; } From 23b193b2ab8f06057475574132f5db93358e1b8e Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Thu, 25 Jan 2024 19:30:41 +0000 Subject: [PATCH 31/33] Formatting changes for numpy data reader --- .../include/readers/image/numpy_data_reader.h | 26 ++--- .../readers/image/numpy_data_reader.cpp | 103 +++++++++--------- 2 files changed, 65 insertions(+), 64 deletions(-) diff --git a/rocAL/include/readers/image/numpy_data_reader.h b/rocAL/include/readers/image/numpy_data_reader.h index 70c947a4f..bc814a046 100644 --- a/rocAL/include/readers/image/numpy_data_reader.h +++ b/rocAL/include/readers/image/numpy_data_reader.h @@ -24,9 +24,9 @@ THE SOFTWARE. #include #include +#include #include #include -#include #include "commons.h" #include "image_reader.h" @@ -102,23 +102,23 @@ class NumpyDataReader : public Reader { size_t _file_count_all_shards; std::mutex _cache_mutex_; std::map _header_cache_; - const RocalTensorDataType TypeFromNumpyStr(const std::string& format); - inline void SkipSpaces(const char*& ptr); - void ParseHeaderContents(NumpyHeaderData& target, const std::string& header); + const RocalTensorDataType get_numpy_dtype(const std::string& format); + inline void ignore_spaces(const char*& ptr); + void decode_header(NumpyHeaderData& target, const std::string& header); template - void Skip(const char*& ptr, const char (&what)[N]); + void skip_string(const char*& ptr, const char (&what)[N]); template - bool TrySkip(const char*& ptr, const char (&what)[N]); + bool check_and_skip_string(const char*& ptr, const char (&what)[N]); template - void SkipFieldName(const char*& ptr, const char (&name)[N]); + void skip_field(const char*& ptr, const char (&name)[N]); template - T ParseInteger(const char*& ptr); - std::string ParseStringValue(const char*& input, char delim_start = '\'', char delim_end = '\''); - void ParseHeader(NumpyHeaderData& parsed_header, std::string file_path); + T parse_int(const char*& ptr); + std::string read_dtype_string(const char*& input, char delim_start = '\'', char delim_end = '\''); + void read_header(NumpyHeaderData& parsed_header, std::string file_path); template - size_t ParseNumpyData(T* buf, std::vector strides, std::vector shapes, unsigned dim = 0); - bool GetFromCache(const std::string& file_name, NumpyHeaderData& target); - void UpdateCache(const std::string& file_name, const NumpyHeaderData& value); + size_t copy_array_data(T* buf, std::vector strides, std::vector shapes, unsigned dim = 0); + bool get_cached_header(const std::string& file_name, NumpyHeaderData& target); + void update_header_cache(const std::string& file_name, const NumpyHeaderData& value); void incremenet_read_ptr(); int release(); size_t get_file_shard_id(); diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index 8090514a7..2f2171509 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -25,9 +25,10 @@ THE SOFTWARE. #include #include +#include #include #include -#include + #include "filesystem.h" NumpyDataReader::NumpyDataReader() : _shuffle_time("shuffle_time", DBG_TIMING) { @@ -89,7 +90,7 @@ void NumpyDataReader::incremenet_read_ptr() { } size_t NumpyDataReader::open() { - auto file_path = _file_names[_curr_file_idx]; // Get current file name + auto file_path = _file_names[_curr_file_idx]; // Get current file name _curr_file_header = _file_headers[_curr_file_idx]; // Get current file header incremenet_read_ptr(); _last_id = file_path; @@ -98,10 +99,10 @@ size_t NumpyDataReader::open() { _last_id.erase(0, last_slash_idx + 1); } - auto ret = GetFromCache(file_path, _curr_file_header); + auto ret = get_cached_header(file_path, _curr_file_header); if (!ret) { - ParseHeader(_curr_file_header, file_path); - UpdateCache(file_path, _curr_file_header); + read_header(_curr_file_header, file_path); + update_header_cache(file_path, _curr_file_header); } else { _current_fPtr = std::fopen(file_path.c_str(), "rb"); if (_current_fPtr == nullptr) @@ -112,7 +113,7 @@ size_t NumpyDataReader::open() { return _curr_file_header.nbytes(); } -bool NumpyDataReader::GetFromCache(const std::string& file_name, NumpyHeaderData& header) { +bool NumpyDataReader::get_cached_header(const std::string& file_name, NumpyHeaderData& header) { std::unique_lock cache_lock(_cache_mutex_); auto it = _header_cache_.find(file_name); if (it == _header_cache_.end()) { @@ -123,12 +124,12 @@ bool NumpyDataReader::GetFromCache(const std::string& file_name, NumpyHeaderData } } -void NumpyDataReader::UpdateCache(const std::string& file_name, const NumpyHeaderData& value) { +void NumpyDataReader::update_header_cache(const std::string& file_name, const NumpyHeaderData& value) { std::unique_lock cache_lock(_cache_mutex_); _header_cache_[file_name] = value; } -const RocalTensorDataType NumpyDataReader::TypeFromNumpyStr(const std::string& format) { +const RocalTensorDataType NumpyDataReader::get_numpy_dtype(const std::string& format) { if (format == "u1") return RocalTensorDataType::UINT8; // if (format == "u2") return TypeTable::GetTypeInfo(); // Currently not supported in rocAL if (format == "u4") return RocalTensorDataType::UINT32; @@ -148,20 +149,20 @@ const RocalTensorDataType NumpyDataReader::TypeFromNumpyStr(const std::string& f THROW("Unknown Numpy type string"); } -inline void NumpyDataReader::SkipSpaces(const char*& ptr) { +inline void NumpyDataReader::ignore_spaces(const char*& ptr) { while (::isspace(*ptr)) ptr++; } template -void NumpyDataReader::Skip(const char*& ptr, const char (&what)[N]) { +void NumpyDataReader::skip_string(const char*& ptr, const char (&what)[N]) { if (strncmp(ptr, what, N - 1)) THROW("Found wrong symbol during parsing"); ptr += N - 1; } template -bool NumpyDataReader::TrySkip(const char*& ptr, const char (&what)[N]) { +bool NumpyDataReader::check_and_skip_string(const char*& ptr, const char (&what)[N]) { if (!strncmp(ptr, what, N - 1)) { ptr += N - 1; return true; @@ -171,18 +172,18 @@ bool NumpyDataReader::TrySkip(const char*& ptr, const char (&what)[N]) { } template -void NumpyDataReader::SkipFieldName(const char*& ptr, const char (&name)[N]) { - SkipSpaces(ptr); - Skip(ptr, "'"); - Skip(ptr, name); - Skip(ptr, "'"); - SkipSpaces(ptr); - Skip(ptr, ":"); - SkipSpaces(ptr); +void NumpyDataReader::skip_field(const char*& ptr, const char (&name)[N]) { + ignore_spaces(ptr); + skip_string(ptr, "'"); + skip_string(ptr, name); + skip_string(ptr, "'"); + ignore_spaces(ptr); + skip_string(ptr, ":"); + ignore_spaces(ptr); } template -T NumpyDataReader::ParseInteger(const char*& ptr) { +T NumpyDataReader::parse_int(const char*& ptr) { char* out_ptr = const_cast(ptr); // strtol takes a non-const pointer T value = static_cast(strtol(ptr, &out_ptr, 10)); if (out_ptr == ptr) @@ -191,7 +192,7 @@ T NumpyDataReader::ParseInteger(const char*& ptr) { return value; } -std::string NumpyDataReader::ParseStringValue(const char*& input, char delim_start, char delim_end) { +std::string NumpyDataReader::read_dtype_string(const char*& input, char delim_start, char delim_end) { if (*input++ != delim_start) THROW("Expected \'" + std::to_string(delim_start) + "\'"); std::string out; @@ -229,39 +230,39 @@ std::string NumpyDataReader::ParseStringValue(const char*& input, char delim_sta return out; } -void NumpyDataReader::ParseHeaderContents(NumpyHeaderData& target, const std::string& header) { +void NumpyDataReader::decode_header(NumpyHeaderData& target, const std::string& header) { const char* hdr = header.c_str(); - SkipSpaces(hdr); - Skip(hdr, "{"); - SkipFieldName(hdr, "descr"); - auto typestr = ParseStringValue(hdr); + ignore_spaces(hdr); + skip_string(hdr, "{"); + skip_field(hdr, "descr"); + auto typestr = read_dtype_string(hdr); // < means LE, | means N/A, = means native. In all those cases, we can read bool little_endian = (typestr[0] == '<' || typestr[0] == '|' || typestr[0] == '='); if (!little_endian) THROW("Big Endian files are not supported."); - target._type_info = TypeFromNumpyStr(typestr.substr(1)); + target._type_info = get_numpy_dtype(typestr.substr(1)); - SkipSpaces(hdr); - Skip(hdr, ","); - SkipFieldName(hdr, "fortran_order"); - if (TrySkip(hdr, "True")) { + ignore_spaces(hdr); + skip_string(hdr, ","); + skip_field(hdr, "fortran_order"); + if (check_and_skip_string(hdr, "True")) { target._fortran_order = true; - } else if (TrySkip(hdr, "False")) { + } else if (check_and_skip_string(hdr, "False")) { target._fortran_order = false; } else { THROW("Failed to parse fortran_order field."); } - SkipSpaces(hdr); - Skip(hdr, ","); - SkipFieldName(hdr, "shape"); - Skip(hdr, "("); - SkipSpaces(hdr); + ignore_spaces(hdr); + skip_string(hdr, ","); + skip_field(hdr, "shape"); + skip_string(hdr, "("); + ignore_spaces(hdr); target._shape.clear(); while (*hdr != ')') { - // ParseInteger already skips the leading spaces (strtol does). - target._shape.push_back(static_cast(ParseInteger(hdr))); - SkipSpaces(hdr); - if (!(TrySkip(hdr, ",")) && (target._shape.size() <= 1)) + // parse_int already skips the leading spaces (strtol does). + target._shape.push_back(static_cast(parse_int(hdr))); + ignore_spaces(hdr); + if (!(check_and_skip_string(hdr, ",")) && (target._shape.size() <= 1)) THROW("The first number in a tuple must be followed by a comma."); } if (target._fortran_order) { @@ -270,7 +271,7 @@ void NumpyDataReader::ParseHeaderContents(NumpyHeaderData& target, const std::st } } -void NumpyDataReader::ParseHeader(NumpyHeaderData& parsed_header, std::string file_path) { +void NumpyDataReader::read_header(NumpyHeaderData& parsed_header, std::string file_path) { // check if the file is actually a numpy file std::vector token(128); _current_fPtr = std::fopen(file_path.c_str(), "rb"); @@ -311,7 +312,7 @@ void NumpyDataReader::ParseHeader(NumpyHeaderData& parsed_header, std::string fi if (std::fseek(_current_fPtr, offset, SEEK_SET)) THROW("Seek operation failed: " + std::strerror(errno)); - ParseHeaderContents(parsed_header, header); + decode_header(parsed_header, header); parsed_header._data_offset = offset; } @@ -335,27 +336,27 @@ size_t NumpyDataReader::read_numpy_data(void* buf, size_t read_size, std::vector size_t actual_read_size = 0; if (_curr_file_header.type() == RocalTensorDataType::UINT8) - actual_read_size = ParseNumpyData((u_int8_t*)buf, strides, shape); + actual_read_size = copy_array_data((u_int8_t*)buf, strides, shape); if (_curr_file_header.type() == RocalTensorDataType::UINT32) - actual_read_size = ParseNumpyData((u_int32_t*)buf, strides, shape); + actual_read_size = copy_array_data((u_int32_t*)buf, strides, shape); if (_curr_file_header.type() == RocalTensorDataType::INT8) - actual_read_size = ParseNumpyData((int8_t*)buf, strides, shape); + actual_read_size = copy_array_data((int8_t*)buf, strides, shape); if (_curr_file_header.type() == RocalTensorDataType::INT32) - actual_read_size = ParseNumpyData((int32_t*)buf, strides, shape); + actual_read_size = copy_array_data((int32_t*)buf, strides, shape); if (_curr_file_header.type() == RocalTensorDataType::FP16) #if defined(AMD_FP16_SUPPORT) - actual_read_size = ParseNumpyData((half*)buf, strides, shape); + actual_read_size = copy_array_data((half*)buf, strides, shape); #else THROW("FLOAT16 type tensor not supported") #endif if (_curr_file_header.type() == RocalTensorDataType::FP32) - actual_read_size = ParseNumpyData((float*)buf, strides, shape); + actual_read_size = copy_array_data((float*)buf, strides, shape); return actual_read_size; } template -size_t NumpyDataReader::ParseNumpyData(T* buf, std::vector strides, std::vector shapes, unsigned dim) { +size_t NumpyDataReader::copy_array_data(T* buf, std::vector strides, std::vector shapes, unsigned dim) { if (dim == (shapes.size() - 1)) { auto actual_read_size = std::fread(buf, sizeof(T), shapes[dim], _current_fPtr); return actual_read_size; @@ -363,7 +364,7 @@ size_t NumpyDataReader::ParseNumpyData(T* buf, std::vector strides, st T* startPtr = buf; size_t read_size = 0; for (unsigned d = 0; d < shapes[dim]; d++) { - read_size += ParseNumpyData(startPtr, strides, shapes, dim + 1); + read_size += copy_array_data(startPtr, strides, shapes, dim + 1); startPtr += strides[dim + 1]; } return read_size; From 14079e031968865484918d55c3b6b677f88677fc Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Mon, 29 Jan 2024 08:43:56 +0000 Subject: [PATCH 32/33] Adding cast augmentation API --- rocAL/include/api/rocal_api_augmentation.h | 11 ++++++++ rocAL/source/api/rocal_api_augmentation.cpp | 29 +++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index 71264e42c..7cb74e75e 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -1200,6 +1200,17 @@ extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, R RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/** + * \brief Cast input tensor from one data type to another + * \param context Rocal context + * \param input Input tensor + * \param is_output Sets if the output is to be given to user or as intermediate buffer + * \param output_datatype Datatype of the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCast(RocalContext context, RocalTensor input, + bool is_output, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + extern "C" RocalTensor ROCAL_API_CALL rocalSetLayout(RocalContext context, RocalTensor input, RocalTensorLayout output_layout = ROCAL_NONE); #endif // MIVISIONX_ROCAL_API_AUGMENTATION_H diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index efd233c93..ea1c3344c 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -2326,6 +2326,35 @@ rocalNop( return output; } +RocalTensor ROCAL_API_CALL rocalCast(RocalContext p_context, RocalTensor p_input, + bool is_output, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input tensor") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + + if (input->info().data_type() == op_tensor_datatype) { + output = context->master_graph->create_tensor(input->info(), is_output); + context->master_graph->add_node({input}, {output}); + } else { + TensorInfo output_info = input->info(); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + context->master_graph->add_node({input}, {output}); + } + } catch(const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalSetLayout( RocalContext p_context, From e56f1622580c0f548d0c4a7fc2271c6120c7fb0b Mon Sep 17 00:00:00 2001 From: SundarRajan98 Date: Tue, 30 Jan 2024 19:25:25 +0000 Subject: [PATCH 33/33] Modifying vx_roi_handle creation for generic ROI --- rocAL/source/pipeline/tensor.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index 2cab4ce79..043c08319 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -333,18 +333,21 @@ void Tensor::create_roi_tensor_from_handle(void **handle) { THROW("Empty ROI handle is passed") } - vx_size num_of_dims = 2; - vx_size stride[num_of_dims]; - std::vector roi_dims = {_info.batch_size(), 4}; + auto _is_image = _info.is_image(); + vx_size roi_num_of_dims = 2; + vx_size num_of_dims = _is_image ? 2 : (_info.num_of_dims() - 1); + std::vector roi_dims; + roi_dims = {_info.batch_size(), num_of_dims * 2}; if (_info.layout() == RocalTensorlayout::NFCHW || _info.layout() == RocalTensorlayout::NFHWC) roi_dims = {_info.dims()[0] * _info.dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions stride[0] = sizeof(vx_uint32); + vx_size stride[roi_num_of_dims]; stride[0] = sizeof(vx_uint32); stride[1] = stride[0] * roi_dims[0]; vx_enum mem_type = VX_MEMORY_TYPE_HOST; if (_info.mem_type() == RocalMemType::HIP) mem_type = VX_MEMORY_TYPE_HIP; - _vx_roi_handle = vxCreateTensorFromHandle(_context, num_of_dims, roi_dims.data(), + _vx_roi_handle = vxCreateTensorFromHandle(_context, roi_num_of_dims, roi_dims.data(), VX_TYPE_UINT32, 0, stride, *handle, mem_type); vx_status status; if ((status = vxGetStatus((vx_reference)_vx_roi_handle)) != VX_SUCCESS)