diff --git a/apps/image_augmentation/image_augmentation.cpp b/apps/image_augmentation/image_augmentation.cpp index 1286d1862..a21b89393 100644 --- a/apps/image_augmentation/image_augmentation.cpp +++ b/apps/image_augmentation/image_augmentation.cpp @@ -48,12 +48,12 @@ int main(int argc, const char** argv) { if (argc < MIN_ARG_COUNT) { printf( "Usage: image_augmentation \ - decode_width decode_height video_mode gray_scale/rgb display_on_off decode_shard_count \n"); + decode_width decode_height decoder_mode gray_scale/rgb display_on_off decode_shard_count \n"); return -1; } int argIdx = 0; const char* folderPath1 = argv[++argIdx]; - int video_mode = 0; // 0 means no video decode, 1 means hardware, 2 means software decoding + int decoder_mode = 0; // 0 means no video decode, 1 means hardware, 2 means software decoding bool display = 1; // Display the images int aug_depth = 1; // how deep is the augmentation tree int rgb = 1; // process color images @@ -62,7 +62,7 @@ int main(int argc, const char** argv) { bool processing_device = 1; size_t shard_count = 2; int shuffle = 0; - int dec_mode = 0; + int decoder_type = 0; const char *outName = "image_augmentation_app.png"; if (argc >= argIdx + MIN_ARG_COUNT) @@ -75,7 +75,7 @@ int main(int argc, const char** argv) { decode_height = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) - video_mode = atoi(argv[++argIdx]); + decoder_mode = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) rgb = atoi(argv[++argIdx]); @@ -90,7 +90,7 @@ int main(int argc, const char** argv) { shuffle = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) - dec_mode = atoi(argv[++argIdx]); + decoder_type = atoi(argv[++argIdx]); if (argc >= argIdx + MIN_ARG_COUNT) outName = argv[++argIdx]; @@ -108,7 +108,7 @@ int main(int argc, const char** argv) { return -1; } - RocalDecoderType dec_type = (RocalDecoderType)dec_mode; + RocalDecoderType dec_type = (RocalDecoderType)decoder_type; /*>>>>>>>>>>>>>>>> Creating rocAL parameters <<<<<<<<<<<<<<<<*/ @@ -126,7 +126,7 @@ int main(int argc, const char** argv) { /*>>>>>>>>>>>>>>>>>>> Graph description <<<<<<<<<<<<<<<<<<<*/ RocalTensor input1; - if (video_mode != 0) { + if (decoder_mode >= 2) { unsigned sequence_length = 3; unsigned frame_step = 3; unsigned frame_stride = 1; @@ -134,7 +134,12 @@ int main(int argc, const char** argv) { std::cout << "Output width and height is needed for video decode\n"; return -1; } - input1 = rocalVideoFileSource(handle, folderPath1, color_format, ((video_mode == 1) ? RocalDecodeDevice::ROCAL_HW_DECODE : RocalDecodeDevice::ROCAL_SW_DECODE), shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false); + input1 = rocalVideoFileSource(handle, folderPath1, color_format, (decoder_mode == 2)? ROCAL_SW_DECODE: ROCAL_HW_DECODE, shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false); + } else if (decoder_mode == 1) { + std::vector area = {0.08, 1}; + std::vector aspect_ratio = {3.0f / 4, 4.0f / 3}; + input1 = rocalFusedJpegCrop(handle, folderPath1, color_format, shard_count, false, area, aspect_ratio, 10, false, false, ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED, decode_width, decode_height); + } else { // The jpeg file loader can automatically select the best size to decode all images to that size // User can alternatively set the size or change the policy that is used to automatically find the size @@ -152,7 +157,7 @@ int main(int argc, const char** argv) { RocalTensor tensor0; int resize_w = 112, resize_h = 112; - if (video_mode) { + if (decoder_mode >= 2) { resize_h = decode_height; resize_w = decode_width; tensor0 = input1; @@ -214,7 +219,7 @@ int main(int argc, const char** argv) { int w = rocalGetOutputWidth(handle); int p = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? 3 : 1); std::cout << "output width " << w << " output height " << h << " color planes " << p << std::endl; - const unsigned number_of_cols = video_mode ? 1 : 10; + const unsigned number_of_cols = (decoder_mode >= 2) ? 1 : 10; auto cv_color_format = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? CV_8UC3 : CV_8UC1); cv::Mat mat_output(h + AMD_ROCm_Black_resize.rows, w * number_of_cols, cv_color_format); cv::Mat mat_input(h, w, cv_color_format); diff --git a/docs/examples/image_processing/decoder.py b/docs/examples/image_processing/decoder.py index eccce45fd..073fa383c 100644 --- a/docs/examples/image_processing/decoder.py +++ b/docs/examples/image_processing/decoder.py @@ -9,7 +9,7 @@ import cupy as cp seed = 1549361629 -image_dir = "../../../../data/images/AMD-tinyDataSet/" +image_dir = "../../../data/images/AMD-tinyDataSet/" batch_size = 4 gpu_id = 0 @@ -34,13 +34,13 @@ def show_pipeline_output(pipe, device): pipe.build() data_loader = ROCALClassificationIterator(pipe, device) images = next(iter(data_loader)) - show_images(images[0], device) + show_images(images[0][0], device) @pipeline_def(seed=seed) def image_decoder_pipeline(device="cpu", path=image_dir): - jpegs, labels = fn.readers.file(file_root=path, shard_id=0, num_shards=1, random_shuffle=False) + jpegs, labels = fn.readers.file(file_root=path) images = fn.decoders.image(jpegs, file_root=path, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False) - return fn.resize(images, device=device, resize_x=300, resize_y=300) + return fn.resize(images, device=device, resize_width=300, resize_height=300) def main(): print ('Optional arguments: ') @@ -52,9 +52,8 @@ def main(): rocal_device = "gpu" if len(sys.argv) > 2: img_folder = sys.argv[2] - - pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, - reverse_channels=True, mean = [0, 0, 0], std=[255, 255, 255], device=rocal_device, path=img_folder) + pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, + reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=rocal_device, path=img_folder) show_pipeline_output(pipe, device=rocal_device) if __name__ == '__main__': diff --git a/docs/examples/image_processing/decoder_examples.ipynb b/docs/examples/image_processing/decoder_examples.ipynb index 27098f079..cb1bef27e 100644 --- a/docs/examples/image_processing/decoder_examples.ipynb +++ b/docs/examples/image_processing/decoder_examples.ipynb @@ -38,7 +38,7 @@ "%matplotlib inline\n", "\n", "seed = 1549361629\n", - "image_dir = \"../../../../data/images/AMD-tinyDataSet/\"\n", + "image_dir = \"../../../data/images/AMD-tinyDataSet/\"\n", "batch_size = 4\n", "gpu_id = 0\n", "\n", @@ -61,7 +61,7 @@ " pipe.build()\n", " data_loader = ROCALClassificationIterator(pipe, device, device_id)\n", " images = next(iter(data_loader))\n", - " show_images(images[0], device)\n" + " show_images(images[0][0], device)\n" ] }, { @@ -82,9 +82,9 @@ "source": [ "@pipeline_def(seed=seed)\n", "def image_decoder_pipeline(device=\"cpu\"):\n", - " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n", + " jpegs, labels = fn.readers.file(file_root=image_dir)\n", " images = fn.decoders.image(jpegs, file_root=image_dir, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n", - " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n", + " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n", "\n", "pipe = image_decoder_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n", " reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=\"cpu\")\n", @@ -109,12 +109,13 @@ "source": [ "@pipeline_def(seed=seed)\n", "def image_decoder_random_crop_pipeline(device=\"cpu\"):\n", - " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n", + " jpegs, labels = fn.readers.file(file_root=image_dir)\n", " images = fn.decoders.image_slice(jpegs, file_root=image_dir, \n", - " device=device,\n", " output_type=types.RGB,\n", + " shard_id = 0,\n", + " num_shards = 1,\n", " random_shuffle=True)\n", - " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n", + " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n", " \n", "pipe = image_decoder_random_crop_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n", " reverse_channels=True, mean=[0,0,0], std = [255,255,255], device=\"cpu\")\n", @@ -184,7 +185,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/docs/examples/image_processing/inference_pipeline.py b/docs/examples/image_processing/inference_pipeline.py index a7db74e16..f97da7b37 100644 --- a/docs/examples/image_processing/inference_pipeline.py +++ b/docs/examples/image_processing/inference_pipeline.py @@ -31,7 +31,7 @@ seed = 1549361629 -image_dir = "../../../../data/images/AMD-tinyDataSet/" +image_dir = "../../../data/images/AMD-tinyDataSet/" batch_size = 4 gpu_id = 0 diff --git a/rocAL-setup.py b/rocAL-setup.py index fa6b5de91..1032aef6c 100644 --- a/rocAL-setup.py +++ b/rocAL-setup.py @@ -311,11 +311,11 @@ os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + linuxSystemInstall_check+' install lmdb-devel rapidjson-devel') - # turbo-JPEG - https://github.com/rrawther/libjpeg-turbo.git -- 2.0.6.2 + # turbo-JPEG - https://github.com/libjpeg-turbo/libjpeg-turbo.git -- 3.0.1 os.system( - '(cd '+deps_dir+'; git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git )') + '(cd '+deps_dir+'; git clone -b 3.0.1 https://github.com/libjpeg-turbo/libjpeg-turbo.git )') os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake + - ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ..; make -j 4; sudo make install )') + ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..; make -j 4; sudo make install )') # RPP os.system('sudo -v') os.system('(cd '+deps_dir+'; git clone -b '+rppVersion+' https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp.git; cd rpp; mkdir build-'+backend+'; cd build-'+backend+'; ' + diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt index 1dc4630e1..c81ed5f99 100644 --- a/rocAL/CMakeLists.txt +++ b/rocAL/CMakeLists.txt @@ -42,6 +42,14 @@ find_package(RapidJSON QUIET) find_package(StdFilesystem QUIET) find_package(HALF QUIET) +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") +elseif(ROCM_PATH) + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") +endif() + # HIP Backend if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP") if(NOT DEFINED HIP_PATH) @@ -225,6 +233,7 @@ if(${BUILD_ROCAL}) include/augmentations/geometry_augmentations/ include/decoders/image/ include/decoders/video/ + include/decoders/libjpeg/ include/device/ include/loaders/ include/loaders/image/ diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index d236073fa..7cb74e75e 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -329,6 +329,20 @@ extern "C" RocalTensor ROCAL_API_CALL rocalFlipFixed(RocalContext context, Rocal RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/*! \brief Transposes the tensors by reordering the dimensions based on the perm parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] perm Permutation of the dimensions of the input + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalTranspose(RocalContext context, RocalTensor input, std::vector perm, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + /*! \brief Applies blur effect to images. * \ingroup group_rocal_augmentations * \param [in] context Rocal context @@ -997,6 +1011,30 @@ extern "C" RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext cont RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/*! \brief Performs normalization on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] axes axes list for tensor normalization + * \param [in] mean mean value (specified for each channel) for tensor normalization + * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization + * \param [in] scale scale value (specified for each channel) for tensor normalization + * \param [in] shift shift value (specified for each channel) for tensor normalization + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalNormalize(RocalContext context, RocalTensor input, + std::vector &axes, + std::vector &mean, + std::vector &std_dev, + bool is_output, + float scale = 1.0, float shift = 0.0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + /*! \brief Crops images. * \ingroup group_rocal_augmentations * \param [in] context Rocal context @@ -1162,7 +1200,17 @@ extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, R RocalTensorLayout output_layout = ROCAL_NONE, RocalTensorOutputType output_datatype = ROCAL_UINT8); +/** + * \brief Cast input tensor from one data type to another + * \param context Rocal context + * \param input Input tensor + * \param is_output Sets if the output is to be given to user or as intermediate buffer + * \param output_datatype Datatype of the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCast(RocalContext context, RocalTensor input, + bool is_output, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + extern "C" RocalTensor ROCAL_API_CALL rocalSetLayout(RocalContext context, RocalTensor input, RocalTensorLayout output_layout = ROCAL_NONE); - #endif // MIVISIONX_ROCAL_API_AUGMENTATION_H diff --git a/rocAL/include/augmentations/augmentations_nodes.h b/rocAL/include/augmentations/augmentations_nodes.h index ef6beff32..c01fb0691 100644 --- a/rocAL/include/augmentations/augmentations_nodes.h +++ b/rocAL/include/augmentations/augmentations_nodes.h @@ -57,3 +57,6 @@ THE SOFTWARE. #include "node_sequence_rearrange.h" #include "node_gaussian_noise.h" #include "node_slice.h" +#include "node_transpose.h" +#include "node_normalize.h" +#include "node_cast.h" diff --git a/rocAL/include/augmentations/effects_augmentations/node_normalize.h b/rocAL/include/augmentations/effects_augmentations/node_normalize.h new file mode 100644 index 000000000..6ad49d08f --- /dev/null +++ b/rocAL/include/augmentations/effects_augmentations/node_normalize.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "parameter_vx.h" + +class NormalizeNode : public Node { + public: + NormalizeNode(const std::vector &inputs, + const std::vector &outputs); + NormalizeNode() = delete; + void init(std::vector &axes, std::vector &mean, std::vector &std_dev, float scale, float shift); + + protected: + void create_node() override; + void update_node() override {}; + + private: + int _axis_mask = 0; + uint _compute_mean, _compute_stddev; + vx_array _mean_vx_array, _stddev_vx_array; + std::vector _axes; + std::vector _mean, _std_dev; + float _scale, _shift; + std::vector> _normalize_roi; +}; \ No newline at end of file diff --git a/rocAL/include/augmentations/geometry_augmentations/node_transpose.h b/rocAL/include/augmentations/geometry_augmentations/node_transpose.h new file mode 100644 index 000000000..d8b6e94c1 --- /dev/null +++ b/rocAL/include/augmentations/geometry_augmentations/node_transpose.h @@ -0,0 +1,40 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "parameter_vx.h" + +class TransposeNode : public Node { + public: + TransposeNode(const std::vector &inputs, const std::vector &outputs); + TransposeNode() = delete; + void init(std::vector perm); + + protected: + void create_node() override; + void update_node() override {}; + + private: + std::vector _perm; + vx_array _perm_array; +}; diff --git a/rocAL/include/augmentations/node_cast.h b/rocAL/include/augmentations/node_cast.h new file mode 100644 index 000000000..67930261b --- /dev/null +++ b/rocAL/include/augmentations/node_cast.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "graph.h" + +class CastNode : public Node +{ +public: + CastNode(const std::vector &inputs, const std::vector &outputs); + CastNode() = delete; + +protected: + void create_node() override; + void update_node() override {}; +}; diff --git a/rocAL/include/decoders/image/fused_crop_decoder.h b/rocAL/include/decoders/image/fused_crop_decoder.h index 718919b90..ae59f6bf1 100644 --- a/rocAL/include/decoders/image/fused_crop_decoder.h +++ b/rocAL/include/decoders/image/fused_crop_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/decoders/image/turbo_jpeg_decoder.h b/rocAL/include/decoders/image/turbo_jpeg_decoder.h index ce4dba600..99e67abac 100644 --- a/rocAL/include/decoders/image/turbo_jpeg_decoder.h +++ b/rocAL/include/decoders/image/turbo_jpeg_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -64,24 +64,8 @@ class TJDecoder : public Decoder { private: tjhandle m_jpegDecompressor; - const static unsigned SCALING_FACTORS_COUNT = 16; - const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = { - {2, 1}, - {15, 8}, - {7, 4}, - {13, 8}, - {3, 2}, - {11, 8}, - {5, 4}, - {9, 8}, - {1, 1}, - {7, 8}, - {3, 4}, - {5, 8}, - {1, 2}, - {3, 8}, - {1, 4}, - {1, 8}}; + tjscalingfactor *_scaling_factors = nullptr; + int _num_scaling_factors = 0; bool _is_partial_decoder = false; std::vector _bbox_coord; const static unsigned _max_scaling_factor = 8; diff --git a/rocAL/include/decoders/libjpeg/libjpeg_extra.h b/rocAL/include/decoders/libjpeg/libjpeg_extra.h new file mode 100644 index 000000000..69db1028a --- /dev/null +++ b/rocAL/include/decoders/libjpeg/libjpeg_extra.h @@ -0,0 +1,75 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include +#include +#include +#include +#include "libjpeg_utils.h" + +extern "C" { + +//! extra apis for rocal to support partial decoding + +//! * Helper function to se the source +//! * This function doesn't scale the decoded image + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * This function doesn't scale the decoded image + +/*! + \param handle TJPeg handle + \param jpegBuf compressed jpeg image buffer + \param jpegSize Size of the compressed data provided in the input_buffer + \param dstBuf user provided output buffer + \param width, pitch, height width, stride and height of the allocated buffer + \param flags TJPEG flags + \param pixelFormat pixel format of the image + \param crop_x_diff, crop_width_diff Actual crop_x and crop_w (adjusted to MB boundery) + \param x1, y1, crop_width, crop_height requested crop window +*/ + +int tjDecompress2_partial(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int *crop_x_diff, unsigned int *crop_width_diff, + unsigned int x1, unsigned int y1, unsigned int crop_width, unsigned int crop_height); + + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * This function scale the decoded image to fit the output dims +/*! + \param handle TJPeg handle + \param jpegBuf compressed jpeg image buffer + \param jpegSize Size of the compressed data provided in the input_buffer + \param dstBuf user provided output buffer + \param width, pitch, height width, stride and height of the allocated buffer + \param flags TJPEG flags + \param crop_width, crop_height requested crop window +*/ + +int tjDecompress2_partial_scale(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int crop_width, unsigned int crop_height); +} \ No newline at end of file diff --git a/rocAL/include/decoders/libjpeg/libjpeg_utils.h b/rocAL/include/decoders/libjpeg/libjpeg_utils.h new file mode 100644 index 000000000..1c588ee0b --- /dev/null +++ b/rocAL/include/decoders/libjpeg/libjpeg_utils.h @@ -0,0 +1,30 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +//! turbojpeg includes + +extern "C" { +#include "jerror.h" +#include "jpeglib.h" +#include "jpegint.h" +} diff --git a/rocAL/include/loaders/image/image_read_and_decode.h b/rocAL/include/loaders/image/image_read_and_decode.h index 471164b54..6682d85f6 100644 --- a/rocAL/include/loaders/image/image_read_and_decode.h +++ b/rocAL/include/loaders/image/image_read_and_decode.h @@ -33,14 +33,6 @@ THE SOFTWARE. #include "timing_debug.h" #include "turbo_jpeg_decoder.h" -/** - * Compute the scaled value of dimension using the given scaling - * factor. This macro performs the integer equivalent of ceil(dimension * - * scalingFactor). - */ -#define TJSCALED(dimension, scalingFactor) \ - ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ - scalingFactor.denom) class ImageReadAndDecode { public: diff --git a/rocAL/include/loaders/image/node_numpy_loader.h b/rocAL/include/loaders/image/node_numpy_loader.h index 91fdd278e..49918e4f5 100644 --- a/rocAL/include/loaders/image/node_numpy_loader.h +++ b/rocAL/include/loaders/image/node_numpy_loader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -50,4 +50,4 @@ class NumpyLoaderNode : public Node { private: std::shared_ptr _loader_module = nullptr; -}; \ No newline at end of file +}; diff --git a/rocAL/include/loaders/image/node_numpy_loader_single_shard.h b/rocAL/include/loaders/image/node_numpy_loader_single_shard.h index 4dc19699e..cd3b464e7 100644 --- a/rocAL/include/loaders/image/node_numpy_loader_single_shard.h +++ b/rocAL/include/loaders/image/node_numpy_loader_single_shard.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/image/numpy_loader.h b/rocAL/include/loaders/image/numpy_loader.h index 2c3285561..0ff053da2 100644 --- a/rocAL/include/loaders/image/numpy_loader.h +++ b/rocAL/include/loaders/image/numpy_loader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,6 @@ THE SOFTWARE. #include "circular_buffer.h" #include "commons.h" #include "image_read_and_decode.h" -// #include "numpy_data_reader.h" // // NumpyLoader runs an internal thread for loading an decoding of numpy arrays asynchronously // it uses a circular buffer to store decoded numpy arrays for the user @@ -54,7 +53,7 @@ class NumpyLoader : public LoaderModule { void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, - const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, int channels, ExternalSourceFileMode mode, bool eos) override {} + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} private: bool is_out_of_data(); diff --git a/rocAL/include/loaders/image/numpy_loader_sharded.h b/rocAL/include/loaders/image/numpy_loader_sharded.h index ada22c06b..744cfc716 100644 --- a/rocAL/include/loaders/image/numpy_loader_sharded.h +++ b/rocAL/include/loaders/image/numpy_loader_sharded.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,6 @@ THE SOFTWARE. */ #pragma once -#include #include "numpy_loader.h" // @@ -46,7 +45,7 @@ class NumpyLoaderSharded : public LoaderModule { void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, - const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, int channels, ExternalSourceFileMode mode, bool eos) override {} + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} private: void increment_loader_idx(); diff --git a/rocAL/include/parameters/parameter.h b/rocAL/include/parameters/parameter.h index 8ce731a5a..1bec7b334 100644 --- a/rocAL/include/parameters/parameter.h +++ b/rocAL/include/parameters/parameter.h @@ -33,9 +33,9 @@ class Parameter { /// used to internally renew state of the parameter if needed (for random parameters) virtual void renew(){}; - virtual void create_array(unsigned batch_size) {}; + virtual void create_array(unsigned batch_size){}; - virtual std::vector get_array() { return{};}; + virtual std::vector get_array() { return {}; }; virtual ~Parameter() {} /// diff --git a/rocAL/include/parameters/parameter_random.h b/rocAL/include/parameters/parameter_random.h index 07500636b..54414ae07 100644 --- a/rocAL/include/parameters/parameter_random.h +++ b/rocAL/include/parameters/parameter_random.h @@ -56,8 +56,6 @@ class UniformRand : public Parameter { return _array; } - - void renew_value() { std::unique_lock lock(_lock); auto val = _generator(); @@ -70,7 +68,6 @@ class UniformRand : public Parameter { _updated_val = static_cast( ((double)val / (double)_generator.max()) * ((double)_end - (double)_start) + (double)_start); } - } void renew_array() { @@ -81,10 +78,9 @@ class UniformRand : public Parameter { } void renew() override { - if (_array.size()>0) { + if (_array.size() > 0) { renew_array(); - } - else { + } else { renew_value(); } } @@ -206,8 +202,7 @@ struct CustomRand : public Parameter { void renew() override { if (_array.size() > 0) { renew_array(); - } - else { + } else { renew_value(); } } diff --git a/rocAL/include/parameters/parameter_vx.h b/rocAL/include/parameters/parameter_vx.h index 5fa59116f..e71cd48ee 100644 --- a/rocAL/include/parameters/parameter_vx.h +++ b/rocAL/include/parameters/parameter_vx.h @@ -55,7 +55,7 @@ class ParameterVX { _batch_size = batch_size; _param->create_array(_batch_size); _array = vxCreateArray(vxGetContext((vx_reference)graph->get()), data_type, _batch_size); - auto status = vxAddArrayItems(_array, _batch_size, get_array().data(), sizeof(T)); + auto status = vxAddArrayItems(_array, _batch_size, get_array().data(), sizeof(T)); if (status != 0) THROW(" vxAddArrayItems failed in create_array (ParameterVX): " + TOSTR(status)) update_array(); diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index 0ccd15770..9c300702b 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -207,8 +207,11 @@ class TensorInfo { modify_strides(); } _layout = layout; - if (_layout == RocalTensorlayout::NONE) - set_max_shape(); + if (_layout == RocalTensorlayout::NHWC || _layout == RocalTensorlayout::NDHWC) { + _channels = _dims.back(); + } else if (_layout == RocalTensorlayout::NCHW || _layout == RocalTensorlayout::NCDHW) { + _channels = _dims.at(1); + } } void set_dims(std::vector& new_dims) { if (_num_of_dims == new_dims.size()) { diff --git a/rocAL/include/readers/image/numpy_data_reader.h b/rocAL/include/readers/image/numpy_data_reader.h index 201eb4fa0..48115c165 100644 --- a/rocAL/include/readers/image/numpy_data_reader.h +++ b/rocAL/include/readers/image/numpy_data_reader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include +#include #include #include #include @@ -83,6 +84,7 @@ class NumpyDataReader : public Reader { unsigned _curr_file_idx; FILE* _current_fPtr; unsigned _current_file_size; + NumpyHeaderData _curr_file_header; std::string _last_id; std::string _last_file_name; size_t _shard_id = 0; @@ -101,23 +103,23 @@ class NumpyDataReader : public Reader { size_t _file_count_all_shards; std::mutex _cache_mutex_; std::map _header_cache_; - const RocalTensorDataType TypeFromNumpyStr(const std::string& format); - inline void SkipSpaces(const char*& ptr); - void ParseHeaderContents(NumpyHeaderData& target, const std::string& header); + const RocalTensorDataType get_numpy_dtype(const std::string& format); + inline void ignore_spaces(const char*& ptr); + void decode_header(NumpyHeaderData& target, const std::string& header); template - void Skip(const char*& ptr, const char (&what)[N]); + void skip_string(const char*& ptr, const char (&what)[N]); template - bool TrySkip(const char*& ptr, const char (&what)[N]); + bool check_and_skip_string(const char*& ptr, const char (&what)[N]); template - void SkipFieldName(const char*& ptr, const char (&name)[N]); + void skip_field(const char*& ptr, const char (&name)[N]); template - T ParseInteger(const char*& ptr); - std::string ParseStringValue(const char*& input, char delim_start = '\'', char delim_end = '\''); - void ParseHeader(NumpyHeaderData& parsed_header, std::string file_path); + T parse_int(const char*& ptr); + std::string read_dtype_string(const char*& input, char delim_start = '\'', char delim_end = '\''); + void read_header(NumpyHeaderData& parsed_header, std::string file_path); template - size_t ParseNumpyData(T* buf, std::vector strides, std::vector shapes, unsigned dim = 0); - bool GetFromCache(const std::string& file_name, NumpyHeaderData& target); - void UpdateCache(const std::string& file_name, const NumpyHeaderData& value); + size_t copy_array_data(T* buf, std::vector strides, std::vector shapes, unsigned dim = 0); + bool get_cached_header(const std::string& file_name, NumpyHeaderData& target); + void update_header_cache(const std::string& file_name, const NumpyHeaderData& value); void incremenet_read_ptr(); int release(); size_t get_file_shard_id(); diff --git a/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/source/api/rocal_api_augmentation.cpp index c740eadc5..ea1c3344c 100644 --- a/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/source/api/rocal_api_augmentation.cpp @@ -1262,6 +1262,37 @@ rocalSlice( return output; } +RocalTensor ROCAL_API_CALL +rocalTranspose( + RocalContext p_context, + RocalTensor p_input, + std::vector perm, + bool is_output, + RocalTensorLayout output_layout, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input image") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorlayout op_tensor_layout = static_cast(output_layout); + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + TensorInfo output_info = input->info(); + output_info.set_tensor_layout(op_tensor_layout); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + std::shared_ptr transpose_node = context->master_graph->add_node({input}, {output}); + transpose_node->init(perm); + } catch (const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalFlip( RocalContext p_context, @@ -1887,6 +1918,35 @@ rocalColorTwistFixed( return output; } +RocalTensor ROCAL_API_CALL +rocalNormalize(RocalContext p_context, RocalTensor p_input, std::vector &axes, + std::vector& mean, std::vector& std_dev, bool is_output, + float scale, float shift, + RocalTensorLayout output_layout, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input tensor") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorlayout op_tensor_layout = static_cast(output_layout); + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + TensorInfo output_info = input->info(); + output_info.set_tensor_layout(op_tensor_layout); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + std::shared_ptr normalize_node = context->master_graph->add_node({input}, {output}); + normalize_node->init(axes, mean, std_dev, scale, shift); + } catch (const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext p_context, RocalTensor p_input, unsigned crop_height, unsigned crop_width, float start_x, float start_y, std::vector& mean, @@ -2266,6 +2326,35 @@ rocalNop( return output; } +RocalTensor ROCAL_API_CALL rocalCast(RocalContext p_context, RocalTensor p_input, + bool is_output, + RocalTensorOutputType output_datatype) { + Tensor* output = nullptr; + if ((p_context == nullptr) || (p_input == nullptr)) { + ERR("Invalid ROCAL context or invalid input tensor") + return output; + } + auto context = static_cast(p_context); + auto input = static_cast(p_input); + try { + RocalTensorDataType op_tensor_datatype = static_cast(output_datatype); + + if (input->info().data_type() == op_tensor_datatype) { + output = context->master_graph->create_tensor(input->info(), is_output); + context->master_graph->add_node({input}, {output}); + } else { + TensorInfo output_info = input->info(); + output_info.set_data_type(op_tensor_datatype); + output = context->master_graph->create_tensor(output_info, is_output); + context->master_graph->add_node({input}, {output}); + } + } catch(const std::exception& e) { + context->capture_error(e.what()); + ERR(e.what()) + } + return output; +} + RocalTensor ROCAL_API_CALL rocalSetLayout( RocalContext p_context, diff --git a/rocAL/source/augmentations/effects_augmentations/node_normalize.cpp b/rocAL/source/augmentations/effects_augmentations/node_normalize.cpp new file mode 100644 index 000000000..16bb59798 --- /dev/null +++ b/rocAL/source/augmentations/effects_augmentations/node_normalize.cpp @@ -0,0 +1,90 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "node_normalize.h" + +#include +#include + +#include "exception.h" + +NormalizeNode::NormalizeNode(const std::vector &inputs, const std::vector &outputs) : Node(inputs, outputs) {} + +void NormalizeNode::create_node() { + if (_node) + return; + + _compute_mean = _mean.size() ? 0 : 1; + _compute_stddev = _std_dev.size() ? 0 : 1; + + uint mean_stddev_array_size = _mean.size(); + std::vector mean_vec, stddev_vec; + mean_vec.resize(_batch_size * mean_stddev_array_size, _mean[0]); + stddev_vec.resize(_batch_size * mean_stddev_array_size, _std_dev[0]); + + if (!_compute_mean && !_compute_stddev) + for (uint i = 0; i < _batch_size; i++) { + for (uint j = 0; j < mean_stddev_array_size; j++) { + mean_vec[i * mean_stddev_array_size + j] = _mean[j]; + stddev_vec[i * mean_stddev_array_size + j] = _std_dev[j]; + } + } + vx_status status = VX_SUCCESS; + if (!_compute_mean) { + _mean_vx_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, mean_vec.size()); + status |= vxAddArrayItems(_mean_vx_array, mean_vec.size(), mean_vec.data(), sizeof(vx_float32)); + if (status != 0) + THROW(" vxAddArrayItems failed in the normalize node (vxExtRppNormalize) node: " + TOSTR(status) + " " + TOSTR(status)) + } + + if (!_compute_stddev) { + _stddev_vx_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, stddev_vec.size()); + status |= vxAddArrayItems(_stddev_vx_array, stddev_vec.size(), stddev_vec.data(), sizeof(vx_float32)); + if (status != 0) + THROW(" vxAddArrayItems failed in the normalize node (vxExtRppNormalize) node: " + TOSTR(status) + " " + TOSTR(status)) + } + vx_scalar axis_mask = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &_axis_mask); + vx_scalar scale = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_scale); + vx_scalar shift = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_shift); + vx_scalar compute_mean = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, &_compute_mean); + vx_scalar compute_stddev = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, &_compute_stddev); + int input_layout = static_cast(_inputs[0]->info().layout()); + int output_layout = static_cast(_outputs[0]->info().layout()); + int roi_type = static_cast(_inputs[0]->info().roi_type()); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + vx_scalar output_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &output_layout); + vx_scalar roi_type_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &roi_type); + + _node = vxExtRppNormalize(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), axis_mask, + _mean_vx_array, _stddev_vx_array, compute_mean, compute_stddev, scale, shift, input_layout_vx, output_layout_vx, roi_type_vx); + if ((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Error adding the crop mirror normalize (vxExtRppNormalize) failed: " + TOSTR(status)) +} + +void NormalizeNode::init(std::vector &axes, std::vector &mean, std::vector &std_dev, float scale, float shift) { + _mean = mean; + _std_dev = std_dev; + _scale = scale; + _shift = shift; + for (unsigned d = 0; d < axes.size(); d++) + _axis_mask |= (1 << axes[d]); +} \ No newline at end of file diff --git a/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp b/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp index 6574ea1bf..6ca4cbd2c 100644 --- a/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp +++ b/rocAL/source/augmentations/geometry_augmentations/node_crop.cpp @@ -102,8 +102,8 @@ void CropNode::create_crop_tensor() { vx_size num_of_dims = 2; vx_size stride[num_of_dims]; std::vector _crop_tensor_dims = {_batch_size, 4}; - if (_inputs[0]->info().layout() == RocalTensorlayout::NFCHW || _inputs[0]->info().layout() == RocalTensorlayout::NFHWC) - _crop_tensor_dims = {_inputs[0]->info().dims()[0] * _inputs[0]->info().dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions + if(_inputs[0]->info().layout() == RocalTensorlayout::NFCHW || _inputs[0]->info().layout() == RocalTensorlayout::NFHWC) + _crop_tensor_dims = {_inputs[0]->info().dims()[0] * _inputs[0]->info().dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions stride[0] = sizeof(vx_uint32); stride[1] = stride[0] * _crop_tensor_dims[0]; vx_enum mem_type = VX_MEMORY_TYPE_HOST; @@ -111,8 +111,8 @@ void CropNode::create_crop_tensor() { mem_type = VX_MEMORY_TYPE_HIP; allocate_host_or_pinned_mem(&_crop_coordinates, stride[1] * 4, _inputs[0]->info().mem_type()); - _crop_tensor = vxCreateTensorFromHandle(vxGetContext((vx_reference)_graph->get()), num_of_dims, _crop_tensor_dims.data(), VX_TYPE_UINT32, 0, - stride, reinterpret_cast(_crop_coordinates), mem_type); + _crop_tensor = vxCreateTensorFromHandle(vxGetContext((vx_reference) _graph->get()), num_of_dims, _crop_tensor_dims.data(), VX_TYPE_UINT32, 0, + stride, reinterpret_cast(_crop_coordinates), mem_type); vx_status status; if ((status = vxGetStatus((vx_reference)_crop_tensor)) != VX_SUCCESS) THROW("Error: vxCreateTensorFromHandle(_crop_tensor: failed " + TOSTR(status)) diff --git a/rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp b/rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp new file mode 100644 index 000000000..9e4376e4b --- /dev/null +++ b/rocAL/source/augmentations/geometry_augmentations/node_transpose.cpp @@ -0,0 +1,51 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "node_transpose.h" +#include "exception.h" + +TransposeNode::TransposeNode(const std::vector &inputs, const std::vector &outputs) : Node(inputs, outputs) {} + +void TransposeNode::create_node() { + if (_node) + return; + + int input_layout = static_cast(_inputs[0]->info().layout()); + int output_layout = static_cast(_outputs[0]->info().layout()); + int roi_type = static_cast(_inputs[0]->info().roi_type()); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + vx_scalar output_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &output_layout); + vx_scalar roi_type_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &roi_type); + _perm_array = vxCreateArray(vxGetContext((vx_reference)_graph->get()), VX_TYPE_UINT32, _perm.size()); + vx_status status = VX_SUCCESS; + status |= vxAddArrayItems(_perm_array, _perm.size(), _perm.data(), sizeof(vx_uint32)); + + _node = vxExtRppTranspose(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), + _perm_array, input_layout_vx, output_layout_vx, roi_type_vx); + if ((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Adding the transpose (vxExtRppTranspose) node failed: " + TOSTR(status)) +} + +void TransposeNode::init(std::vector perm) { + _perm = perm; +} \ No newline at end of file diff --git a/rocAL/source/augmentations/node_cast.cpp b/rocAL/source/augmentations/node_cast.cpp new file mode 100644 index 000000000..cff54c5c2 --- /dev/null +++ b/rocAL/source/augmentations/node_cast.cpp @@ -0,0 +1,44 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "node_cast.h" +#include "exception.h" + +CastNode::CastNode(const std::vector &inputs, const std::vector &outputs) : + Node(inputs, outputs) {} + +void CastNode::create_node() { + if(_node) + return; + + int input_layout = (int)_inputs[0]->info().layout(); + int roi_type = static_cast(_inputs[0]->info().roi_type()); + vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout); + vx_scalar roi_type_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &roi_type); + _node = vxExtRppCast(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), input_layout_vx, roi_type_vx); + + vx_status status; + if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS) + THROW("Adding the copy (vxCastNode) node failed: " + TOSTR(status)) + +} diff --git a/rocAL/source/decoders/image/fused_crop_decoder.cpp b/rocAL/source/decoders/image/fused_crop_decoder.cpp index 2522bca4e..ee14c0f11 100644 --- a/rocAL/source/decoders/image/fused_crop_decoder.cpp +++ b/rocAL/source/decoders/image/fused_crop_decoder.cpp @@ -20,11 +20,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "fused_crop_decoder.h" #include #include #include +#include "fused_crop_decoder.h" +#include "libjpeg_extra.h" + FusedCropTJDecoder::FusedCropTJDecoder() { m_jpegDecompressor = tjInitDecompress(); diff --git a/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp b/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp index 772fc8535..b285e891d 100644 --- a/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp +++ b/rocAL/source/decoders/image/turbo_jpeg_decoder.cpp @@ -20,24 +20,21 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "turbo_jpeg_decoder.h" -#include #include +#include +#include "turbo_jpeg_decoder.h" +#include "libjpeg_extra.h" TJDecoder::TJDecoder() { m_jpegDecompressor = tjInitDecompress(); - -#if 0 - int num_avail_scalings = 0; - auto scaling_factors = tjGetScalingFactors (&num_avail_scalings); - for(int i = 0; i < num_avail_scalings; i++) { - if(scaling_factors[i].num < scaling_factors[i].denom) { - - printf("%d / %d - ",scaling_factors[i].num, scaling_factors[i].denom ); + if ((_scaling_factors = tj3GetScalingFactors(&_num_scaling_factors)) == NULL) + THROW("tjDecompress2_partial_scale(): error getting scaling factors"); + for(int i = 0; i < _num_scaling_factors; i++) { + if(_scaling_factors[i].num < _scaling_factors[i].denom) { + INFO(STR(_scaling_factors[i].num) + "/" + STR(_scaling_factors[i].denom)); } } -#endif }; Decoder::Status TJDecoder::decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) { @@ -90,7 +87,7 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size crop_width = _max_scaling_factor * max_decoded_width; if (crop_width > original_image_width) crop_width = original_image_width; crop_height = crop_width * (1.0 / in_ratio); - if (crop_height > _max_scaling_factor * max_decoded_width) crop_height = _max_scaling_factor * max_decoded_width; + if (crop_height > _max_scaling_factor * max_decoded_height) crop_height = _max_scaling_factor * max_decoded_height; } else if (original_image_height > (_max_scaling_factor * max_decoded_height)) { crop_height = _max_scaling_factor * max_decoded_height; if (crop_height > original_image_height) crop_height = original_image_height; @@ -114,9 +111,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size } // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder uint scaledw = max_decoded_width, scaledh = max_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(crop_width, scaling_factor); - scaledh = TJSCALED(crop_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) { break; } @@ -142,9 +139,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size } // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder uint scaledw = max_decoded_width, scaledh = max_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(original_image_width, scaling_factor); - scaledh = TJSCALED(original_image_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) break; } @@ -168,7 +165,7 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size crop_width = _max_scaling_factor * max_decoded_width; if (crop_width > original_image_width) crop_width = original_image_width; crop_height = crop_width * (1.0 / in_ratio); - if (crop_height > _max_scaling_factor * max_decoded_width) crop_height = _max_scaling_factor * max_decoded_width; + if (crop_height > _max_scaling_factor * max_decoded_height) crop_height = _max_scaling_factor * max_decoded_height; } else if (original_image_height > (_max_scaling_factor * max_decoded_height)) { crop_height = _max_scaling_factor * max_decoded_height; if (crop_height > original_image_height) crop_height = original_image_height; @@ -192,9 +189,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size } // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder uint scaledw = max_decoded_width, scaledh = max_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(crop_width, scaling_factor); - scaledh = TJSCALED(crop_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) { break; } @@ -219,9 +216,9 @@ Decoder::Status TJDecoder::decode(unsigned char* input_buffer, size_t input_size // Find the decoded image size using the predefined scaling factors in the turbo jpeg decoder if ((actual_decoded_width != original_image_width) || (actual_decoded_height != original_image_height)) { uint scaledw = actual_decoded_width, scaledh = actual_decoded_height; - for (auto scaling_factor : SCALING_FACTORS) { - scaledw = TJSCALED(original_image_width, scaling_factor); - scaledh = TJSCALED(original_image_height, scaling_factor); + for (int j=0; j < _num_scaling_factors; j++) { + scaledw = TJSCALED(original_image_width, _scaling_factors[j]); + scaledh = TJSCALED(original_image_height, _scaling_factors[j]); if (scaledw <= max_decoded_width && scaledh <= max_decoded_height) break; } diff --git a/rocAL/source/decoders/libjpeg/libjpeg_extra.cpp b/rocAL/source/decoders/libjpeg/libjpeg_extra.cpp new file mode 100644 index 000000000..ca86f644a --- /dev/null +++ b/rocAL/source/decoders/libjpeg/libjpeg_extra.cpp @@ -0,0 +1,266 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of inst software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and inst permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "libjpeg_extra.h" +#include +#include +#include "commons.h" + +enum { COMPRESS = 1, DECOMPRESS = 2 }; +static J_COLOR_SPACE pf2cs[TJ_NUMPF] = { + JCS_EXT_RGB, JCS_EXT_BGR, JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, + JCS_EXT_XRGB, JCS_GRAYSCALE, JCS_EXT_RGBA, JCS_EXT_BGRA, JCS_EXT_ABGR, + JCS_EXT_ARGB, JCS_CMYK +}; + +struct my_error_mgr { + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; + void (*emit_message) (j_common_ptr, int); + boolean warning, stopOnWarning; +}; +typedef struct my_error_mgr *my_error_ptr; + +/* + * Here's the routine that will replace the standard error_exit method: + */ + +METHODDEF(void) +my_error_exit(j_common_ptr cinfo) +{ + /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */ + my_error_ptr myerr = (my_error_ptr)cinfo->err; + + /* Always display the message. */ + /* We could postpone this until after returning, if we chose. */ + (*cinfo->err->output_message) (cinfo); + + /* Return control to the setjmp point */ + longjmp(myerr->setjmp_buffer, 1); +} + + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * inst function doesn't scale the decoded image +int tjDecompress2_partial(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int *crop_x_diff, unsigned int *crop_width_diff, + unsigned int crop_x, unsigned int crop_y, + unsigned int crop_width, unsigned int crop_height) +{ + JSAMPROW *row_pointer = NULL; + int i, retval = 0; + + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || + pitch < 0 || height < 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF) + THROW("tjDecompress2_partial(): Invalid argument"); + + struct jpeg_decompress_struct cinfo; + // Initialize libjpeg structures to have a memory source + // Modify the usual jpeg error manager to catch fatal errors. + struct my_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + // set up, read header, set image parameters, save size + jpeg_create_decompress(&cinfo); + jpeg_mem_src(&cinfo, jpegBuf, jpegSize); + jpeg_read_header(&cinfo, TRUE); + cinfo.out_color_space = pf2cs[pixelFormat]; + if (flags & TJFLAG_FASTDCT) cinfo.dct_method = JDCT_FASTEST; + if (flags & TJFLAG_FASTUPSAMPLE) cinfo.do_fancy_upsampling = FALSE; + + jpeg_start_decompress(&cinfo); + /* Check for valid crop dimensions. We cannot check these values until + * after jpeg_start_decompress() is called. + */ + if (crop_x + crop_width > cinfo.output_width || crop_y + crop_height > cinfo.output_height) { + ERR("crop dimensions:" << crop_width << " x " << crop_height << " exceed image dimensions" << + cinfo.output_width << " x " << cinfo.output_height); + retval = -1; goto bailout; + } + + jpeg_crop_scanline(&cinfo, &crop_x, &crop_width); + *crop_x_diff = crop_x; + *crop_width_diff = crop_width; + + if (pitch == 0) pitch = cinfo.output_width * tjPixelSize[pixelFormat]; + + if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo.output_height)) == NULL) { + THROW("tjDecompress2_partial(): Memory allocation failure"); + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + } + + // set row pointer for destination + for (i = 0; i < (int)cinfo.output_height; i++) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = &dstBuf[(cinfo.output_height - i - 1) * (size_t)pitch]; + else + row_pointer[i] = &dstBuf[i * (size_t)pitch]; + } + + /* Process data */ + JDIMENSION num_scanlines; + jpeg_skip_scanlines(&cinfo, crop_y); + while (cinfo.output_scanline < crop_y + crop_height) { + if (cinfo.output_scanline < crop_y) + num_scanlines = jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline], + crop_y + crop_height - cinfo.output_scanline); + else + num_scanlines = jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline - crop_y], + crop_y + crop_height - cinfo.output_scanline); + if (num_scanlines == 0){ + ERR("Premature end of Jpeg data. Stopped at " << cinfo.output_scanline - crop_y << "/" + << cinfo.output_height) + } + } + jpeg_skip_scanlines(&cinfo, cinfo.output_height - crop_y - crop_height); + jpeg_finish_decompress(&cinfo); + + bailout: + if (cinfo.global_state > DSTATE_START) jpeg_abort_decompress(&cinfo); + if (row_pointer) free(row_pointer); + return retval; +} + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * inst function scale the decoded image to fit the output dims + +int tjDecompress2_partial_scale(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int crop_width, unsigned int crop_height) +{ + JSAMPROW *row_pointer = NULL; + int i, retval = 0, jpegwidth, jpegheight; + unsigned int scaledw, scaledh, crop_x, crop_y, max_crop_width; + tjscalingfactor *scalingFactors = NULL; + int numScalingFactors = 0; + + unsigned char *tmp_row = NULL; + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || + pitch < 0 || height < 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF) { + THROW("tjDecompress2_partial_scale(): Invalid argument"); + } + + struct jpeg_decompress_struct cinfo; + // Initialize libjpeg structures to have a memory source + // Modify the usual jpeg error manager to catch fatal errors. + struct my_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = my_error_exit; + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + jpeg_mem_src(&cinfo, jpegBuf, jpegSize); + jpeg_read_header(&cinfo, TRUE); + cinfo.out_color_space = pf2cs[pixelFormat]; + if (flags & TJFLAG_FASTDCT) cinfo.dct_method = JDCT_FASTEST; + if (flags & TJFLAG_FASTUPSAMPLE) cinfo.do_fancy_upsampling = FALSE; + + jpegwidth = cinfo.image_width; jpegheight = cinfo.image_height; + if (width == 0) width = jpegwidth; + if (height == 0) height = jpegheight; + if ((scalingFactors = tj3GetScalingFactors(&numScalingFactors)) == NULL) + THROW("tjDecompress2_partial_scale(): error getting scaling factors"); + + for (i = 0; i < numScalingFactors; i++) { + scaledw = TJSCALED(crop_width, scalingFactors[i]); + scaledh = TJSCALED(crop_height, scalingFactors[i]); + if (scaledw <= (unsigned int)width && scaledh <= (unsigned int)height) + break; + } + + if (i >= numScalingFactors) + THROW("tjDecompress2_partial_scale(): Could not scale down to desired image dimensions"); + + if (cinfo.num_components > 3) + THROW("tjDecompress2_partial_scale(): JPEG image must have 3 or fewer components"); + + //width = scaledw; height = scaledh; + cinfo.scale_num = scalingFactors[i].num; + cinfo.scale_denom = scalingFactors[i].denom; + + jpeg_start_decompress(&cinfo); + crop_x = cinfo.output_width - scaledw; + crop_y = cinfo.output_height - scaledh; + + /* Check for valid crop dimensions. We cannot check these values until + * after jpeg_start_decompress() is called. + */ + if (crop_x + scaledw > cinfo.output_width || scaledh > cinfo.output_height) { + ERR("crop dimensions:" << crop_x + scaledw << " x " << scaledh << " exceed image dimensions" << + cinfo.output_width << " x " << cinfo.output_height); + retval = -1; goto bailout; + } + + if (pitch == 0) pitch = cinfo.output_width * tjPixelSize[pixelFormat]; + + if ((row_pointer = + (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo.output_height)) == NULL) + THROW("tjDecompress2_partial_scale(): Memory allocation failure"); + // allocate row of tmp storage for storing discarded data + tmp_row = (unsigned char *)malloc((size_t)pitch); + + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + for (i = 0; i < (int)cinfo.output_height; i++) { + if (i < height) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = &dstBuf[(cinfo.output_height - i - 1) * (size_t)pitch]; + else + row_pointer[i] = &dstBuf[i * (size_t)pitch]; + } else { + row_pointer[i] = tmp_row; + } + } + // the width for the crop shouln't exceed output_width + max_crop_width = scaledw; + jpeg_crop_scanline(&cinfo, &crop_x, &max_crop_width); + jpeg_skip_scanlines(&cinfo, crop_y); + while (cinfo.output_scanline < cinfo.output_height) { + if (cinfo.output_scanline < crop_y) + jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline], cinfo.output_height - cinfo.output_scanline); + else + jpeg_read_scanlines(&cinfo, &row_pointer[cinfo.output_scanline- crop_y], cinfo.output_height - cinfo.output_scanline); + } + jpeg_finish_decompress(&cinfo); + + bailout: + if (cinfo.global_state > DSTATE_START) jpeg_abort_decompress(&cinfo); + if (row_pointer) free(row_pointer); + if (tmp_row) free(tmp_row); + return retval; +} diff --git a/rocAL/source/loaders/image/node_numpy_loader.cpp b/rocAL/source/loaders/image/node_numpy_loader.cpp index b29339c81..3f5319490 100644 --- a/rocAL/source/loaders/image/node_numpy_loader.cpp +++ b/rocAL/source/loaders/image/node_numpy_loader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp b/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp index 061fe18f7..ed9d3730a 100644 --- a/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp +++ b/rocAL/source/loaders/image/node_numpy_loader_single_shard.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/numpy_loader.cpp b/rocAL/source/loaders/image/numpy_loader.cpp index f9d658dae..4e614dca3 100644 --- a/rocAL/source/loaders/image/numpy_loader.cpp +++ b/rocAL/source/loaders/image/numpy_loader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/image/numpy_loader_sharded.cpp b/rocAL/source/loaders/image/numpy_loader_sharded.cpp index 916c24989..b514baf91 100644 --- a/rocAL/source/loaders/image/numpy_loader_sharded.cpp +++ b/rocAL/source/loaders/image/numpy_loader_sharded.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index 5f0a53a42..043c08319 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. */ #include -#if !ENABLE_HIP +#if ENABLE_OPENCL #include #endif #include @@ -116,17 +116,17 @@ void TensorInfo::reset_tensor_roi_buffers() { auto roi_size = (_layout == RocalTensorlayout::NFCHW || _layout == RocalTensorlayout::NFHWC) ? _dims[0] * _dims[1] : _batch_size; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions allocate_host_or_pinned_mem((void **)&roi_buf, roi_size * roi_no_of_dims * 2 * sizeof(unsigned), _mem_type); _roi.set_ptr(roi_buf, _mem_type, roi_size, roi_no_of_dims); - if (_layout == RocalTensorlayout::NCDHW || _layout == RocalTensorlayout::NDHWC) { - for (unsigned i = 0; i < _batch_size; i++) { - unsigned *tensor_shape = _roi[i].end; - tensor_shape[i] = _max_shape[i]; - } - } else if (_is_image) { + if (_is_image) { Roi2DCords *roi = _roi.get_2D_roi(); for (unsigned i = 0; i < _batch_size; i++) { roi[i].xywh.w = _max_shape.at(0); roi[i].xywh.h = _max_shape.at(1); } + } else { + for (unsigned i = 0; i < _batch_size; i++) { + unsigned *tensor_shape = _roi[i].end; + tensor_shape[i] = _max_shape[i]; + } } } @@ -221,10 +221,8 @@ void Tensor::update_tensor_roi(const std::vector> &shape) THROW("The number of dims to be updated and the num of dims of tensor info does not match") unsigned *tensor_shape = _info.roi()[i].end; - if (_info.layout() == RocalTensorlayout::NCDHW || _info.layout() == RocalTensorlayout::NDHWC) { - for (unsigned j = 0; j < max_shape.size(); j++) { - tensor_shape[j] = shape[i][j] > max_shape[j] ? max_shape[j] : shape[i][j]; - } + for (unsigned j = 0; j < max_shape.size(); j++) { + tensor_shape[j] = shape[i][j] > max_shape[j] ? max_shape[j] : shape[i][j]; } } } @@ -335,18 +333,21 @@ void Tensor::create_roi_tensor_from_handle(void **handle) { THROW("Empty ROI handle is passed") } - vx_size num_of_dims = 2; - vx_size stride[num_of_dims]; - std::vector roi_dims = {_info.batch_size(), 4}; + auto _is_image = _info.is_image(); + vx_size roi_num_of_dims = 2; + vx_size num_of_dims = _is_image ? 2 : (_info.num_of_dims() - 1); + std::vector roi_dims; + roi_dims = {_info.batch_size(), num_of_dims * 2}; if (_info.layout() == RocalTensorlayout::NFCHW || _info.layout() == RocalTensorlayout::NFHWC) roi_dims = {_info.dims()[0] * _info.dims()[1], 4}; // For Sequences pre allocating the ROI to N * F to replicate in OpenVX extensions stride[0] = sizeof(vx_uint32); + vx_size stride[roi_num_of_dims]; stride[0] = sizeof(vx_uint32); stride[1] = stride[0] * roi_dims[0]; vx_enum mem_type = VX_MEMORY_TYPE_HOST; if (_info.mem_type() == RocalMemType::HIP) mem_type = VX_MEMORY_TYPE_HIP; - _vx_roi_handle = vxCreateTensorFromHandle(_context, num_of_dims, roi_dims.data(), + _vx_roi_handle = vxCreateTensorFromHandle(_context, roi_num_of_dims, roi_dims.data(), VX_TYPE_UINT32, 0, stride, *handle, mem_type); vx_status status; if ((status = vxGetStatus((vx_reference)_vx_roi_handle)) != VX_SUCCESS) diff --git a/rocAL/source/readers/image/numpy_data_reader.cpp b/rocAL/source/readers/image/numpy_data_reader.cpp index 94f0445dd..2f2171509 100644 --- a/rocAL/source/readers/image/numpy_data_reader.cpp +++ b/rocAL/source/readers/image/numpy_data_reader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -25,9 +25,10 @@ THE SOFTWARE. #include #include +#include #include #include -#include + #include "filesystem.h" NumpyDataReader::NumpyDataReader() : _shuffle_time("shuffle_time", DBG_TIMING) { @@ -89,7 +90,8 @@ void NumpyDataReader::incremenet_read_ptr() { } size_t NumpyDataReader::open() { - auto file_path = _file_names[_curr_file_idx]; // Get next file name + auto file_path = _file_names[_curr_file_idx]; // Get current file name + _curr_file_header = _file_headers[_curr_file_idx]; // Get current file header incremenet_read_ptr(); _last_id = file_path; auto last_slash_idx = _last_id.find_last_of("\\/"); @@ -97,10 +99,10 @@ size_t NumpyDataReader::open() { _last_id.erase(0, last_slash_idx + 1); } - auto ret = GetFromCache(file_path, _file_headers[_curr_file_idx]); + auto ret = get_cached_header(file_path, _curr_file_header); if (!ret) { - ParseHeader(_file_headers[_curr_file_idx], file_path); - UpdateCache(file_path, _file_headers[_curr_file_idx]); + read_header(_curr_file_header, file_path); + update_header_cache(file_path, _curr_file_header); } else { _current_fPtr = std::fopen(file_path.c_str(), "rb"); if (_current_fPtr == nullptr) @@ -108,10 +110,10 @@ size_t NumpyDataReader::open() { } fseek(_current_fPtr, 0, SEEK_SET); // Take the file pointer back to the start - return _file_headers[_curr_file_idx].nbytes(); + return _curr_file_header.nbytes(); } -bool NumpyDataReader::GetFromCache(const std::string& file_name, NumpyHeaderData& header) { +bool NumpyDataReader::get_cached_header(const std::string& file_name, NumpyHeaderData& header) { std::unique_lock cache_lock(_cache_mutex_); auto it = _header_cache_.find(file_name); if (it == _header_cache_.end()) { @@ -122,12 +124,12 @@ bool NumpyDataReader::GetFromCache(const std::string& file_name, NumpyHeaderData } } -void NumpyDataReader::UpdateCache(const std::string& file_name, const NumpyHeaderData& value) { +void NumpyDataReader::update_header_cache(const std::string& file_name, const NumpyHeaderData& value) { std::unique_lock cache_lock(_cache_mutex_); _header_cache_[file_name] = value; } -const RocalTensorDataType NumpyDataReader::TypeFromNumpyStr(const std::string& format) { +const RocalTensorDataType NumpyDataReader::get_numpy_dtype(const std::string& format) { if (format == "u1") return RocalTensorDataType::UINT8; // if (format == "u2") return TypeTable::GetTypeInfo(); // Currently not supported in rocAL if (format == "u4") return RocalTensorDataType::UINT32; @@ -147,20 +149,20 @@ const RocalTensorDataType NumpyDataReader::TypeFromNumpyStr(const std::string& f THROW("Unknown Numpy type string"); } -inline void NumpyDataReader::SkipSpaces(const char*& ptr) { +inline void NumpyDataReader::ignore_spaces(const char*& ptr) { while (::isspace(*ptr)) ptr++; } template -void NumpyDataReader::Skip(const char*& ptr, const char (&what)[N]) { +void NumpyDataReader::skip_string(const char*& ptr, const char (&what)[N]) { if (strncmp(ptr, what, N - 1)) THROW("Found wrong symbol during parsing"); ptr += N - 1; } template -bool NumpyDataReader::TrySkip(const char*& ptr, const char (&what)[N]) { +bool NumpyDataReader::check_and_skip_string(const char*& ptr, const char (&what)[N]) { if (!strncmp(ptr, what, N - 1)) { ptr += N - 1; return true; @@ -170,18 +172,18 @@ bool NumpyDataReader::TrySkip(const char*& ptr, const char (&what)[N]) { } template -void NumpyDataReader::SkipFieldName(const char*& ptr, const char (&name)[N]) { - SkipSpaces(ptr); - Skip(ptr, "'"); - Skip(ptr, name); - Skip(ptr, "'"); - SkipSpaces(ptr); - Skip(ptr, ":"); - SkipSpaces(ptr); +void NumpyDataReader::skip_field(const char*& ptr, const char (&name)[N]) { + ignore_spaces(ptr); + skip_string(ptr, "'"); + skip_string(ptr, name); + skip_string(ptr, "'"); + ignore_spaces(ptr); + skip_string(ptr, ":"); + ignore_spaces(ptr); } template -T NumpyDataReader::ParseInteger(const char*& ptr) { +T NumpyDataReader::parse_int(const char*& ptr) { char* out_ptr = const_cast(ptr); // strtol takes a non-const pointer T value = static_cast(strtol(ptr, &out_ptr, 10)); if (out_ptr == ptr) @@ -190,7 +192,7 @@ T NumpyDataReader::ParseInteger(const char*& ptr) { return value; } -std::string NumpyDataReader::ParseStringValue(const char*& input, char delim_start, char delim_end) { +std::string NumpyDataReader::read_dtype_string(const char*& input, char delim_start, char delim_end) { if (*input++ != delim_start) THROW("Expected \'" + std::to_string(delim_start) + "\'"); std::string out; @@ -228,39 +230,39 @@ std::string NumpyDataReader::ParseStringValue(const char*& input, char delim_sta return out; } -void NumpyDataReader::ParseHeaderContents(NumpyHeaderData& target, const std::string& header) { +void NumpyDataReader::decode_header(NumpyHeaderData& target, const std::string& header) { const char* hdr = header.c_str(); - SkipSpaces(hdr); - Skip(hdr, "{"); - SkipFieldName(hdr, "descr"); - auto typestr = ParseStringValue(hdr); + ignore_spaces(hdr); + skip_string(hdr, "{"); + skip_field(hdr, "descr"); + auto typestr = read_dtype_string(hdr); // < means LE, | means N/A, = means native. In all those cases, we can read bool little_endian = (typestr[0] == '<' || typestr[0] == '|' || typestr[0] == '='); if (!little_endian) THROW("Big Endian files are not supported."); - target._type_info = TypeFromNumpyStr(typestr.substr(1)); + target._type_info = get_numpy_dtype(typestr.substr(1)); - SkipSpaces(hdr); - Skip(hdr, ","); - SkipFieldName(hdr, "fortran_order"); - if (TrySkip(hdr, "True")) { + ignore_spaces(hdr); + skip_string(hdr, ","); + skip_field(hdr, "fortran_order"); + if (check_and_skip_string(hdr, "True")) { target._fortran_order = true; - } else if (TrySkip(hdr, "False")) { + } else if (check_and_skip_string(hdr, "False")) { target._fortran_order = false; } else { THROW("Failed to parse fortran_order field."); } - SkipSpaces(hdr); - Skip(hdr, ","); - SkipFieldName(hdr, "shape"); - Skip(hdr, "("); - SkipSpaces(hdr); + ignore_spaces(hdr); + skip_string(hdr, ","); + skip_field(hdr, "shape"); + skip_string(hdr, "("); + ignore_spaces(hdr); target._shape.clear(); while (*hdr != ')') { - // ParseInteger already skips the leading spaces (strtol does). - target._shape.push_back(static_cast(ParseInteger(hdr))); - SkipSpaces(hdr); - if (!(TrySkip(hdr, ",")) && (target._shape.size() <= 1)) + // parse_int already skips the leading spaces (strtol does). + target._shape.push_back(static_cast(parse_int(hdr))); + ignore_spaces(hdr); + if (!(check_and_skip_string(hdr, ",")) && (target._shape.size() <= 1)) THROW("The first number in a tuple must be followed by a comma."); } if (target._fortran_order) { @@ -269,7 +271,7 @@ void NumpyDataReader::ParseHeaderContents(NumpyHeaderData& target, const std::st } } -void NumpyDataReader::ParseHeader(NumpyHeaderData& parsed_header, std::string file_path) { +void NumpyDataReader::read_header(NumpyHeaderData& parsed_header, std::string file_path) { // check if the file is actually a numpy file std::vector token(128); _current_fPtr = std::fopen(file_path.c_str(), "rb"); @@ -310,7 +312,7 @@ void NumpyDataReader::ParseHeader(NumpyHeaderData& parsed_header, std::string fi if (std::fseek(_current_fPtr, offset, SEEK_SET)) THROW("Seek operation failed: " + std::strerror(errno)); - ParseHeaderContents(parsed_header, header); + decode_header(parsed_header, header); parsed_header._data_offset = offset; } @@ -321,10 +323,10 @@ size_t NumpyDataReader::read_numpy_data(void* buf, size_t read_size, std::vector // Requested read size bigger than the file size? just read as many bytes as the file size read_size = (read_size > _current_file_size) ? _current_file_size : read_size; - if (std::fseek(_current_fPtr, _file_headers[_curr_file_idx]._data_offset, SEEK_SET)) + if (std::fseek(_current_fPtr, _curr_file_header._data_offset, SEEK_SET)) THROW("Seek operation failed: " + std::strerror(errno)); - auto shape = _file_headers[_curr_file_idx].shape(); + auto shape = _curr_file_header.shape(); auto num_dims = max_shape.size(); std::vector strides(num_dims + 1); strides[num_dims] = 1; @@ -333,28 +335,28 @@ size_t NumpyDataReader::read_numpy_data(void* buf, size_t read_size, std::vector } size_t actual_read_size = 0; - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::UINT8) - actual_read_size = ParseNumpyData((u_int8_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::UINT32) - actual_read_size = ParseNumpyData((u_int32_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::INT8) - actual_read_size = ParseNumpyData((int8_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::INT32) - actual_read_size = ParseNumpyData((int32_t*)buf, strides, shape); - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::FP16) + if (_curr_file_header.type() == RocalTensorDataType::UINT8) + actual_read_size = copy_array_data((u_int8_t*)buf, strides, shape); + if (_curr_file_header.type() == RocalTensorDataType::UINT32) + actual_read_size = copy_array_data((u_int32_t*)buf, strides, shape); + if (_curr_file_header.type() == RocalTensorDataType::INT8) + actual_read_size = copy_array_data((int8_t*)buf, strides, shape); + if (_curr_file_header.type() == RocalTensorDataType::INT32) + actual_read_size = copy_array_data((int32_t*)buf, strides, shape); + if (_curr_file_header.type() == RocalTensorDataType::FP16) #if defined(AMD_FP16_SUPPORT) - actual_read_size = ParseNumpyData((half*)buf, strides, shape); + actual_read_size = copy_array_data((half*)buf, strides, shape); #else THROW("FLOAT16 type tensor not supported") #endif - if (_file_headers[_curr_file_idx].type() == RocalTensorDataType::FP32) - actual_read_size = ParseNumpyData((float*)buf, strides, shape); + if (_curr_file_header.type() == RocalTensorDataType::FP32) + actual_read_size = copy_array_data((float*)buf, strides, shape); return actual_read_size; } template -size_t NumpyDataReader::ParseNumpyData(T* buf, std::vector strides, std::vector shapes, unsigned dim) { +size_t NumpyDataReader::copy_array_data(T* buf, std::vector strides, std::vector shapes, unsigned dim) { if (dim == (shapes.size() - 1)) { auto actual_read_size = std::fread(buf, sizeof(T), shapes[dim], _current_fPtr); return actual_read_size; @@ -362,14 +364,14 @@ size_t NumpyDataReader::ParseNumpyData(T* buf, std::vector strides, st T* startPtr = buf; size_t read_size = 0; for (unsigned d = 0; d < shapes[dim]; d++) { - read_size += ParseNumpyData(startPtr, strides, shapes, dim + 1); + read_size += copy_array_data(startPtr, strides, shapes, dim + 1); startPtr += strides[dim + 1]; } return read_size; } const NumpyHeaderData NumpyDataReader::get_numpy_header_data() { - return _file_headers[_curr_file_idx]; + return _curr_file_header; } size_t NumpyDataReader::read_data(unsigned char* buf, size_t read_size) { @@ -379,10 +381,10 @@ size_t NumpyDataReader::read_data(unsigned char* buf, size_t read_size) { // Requested read size bigger than the file size? just read as many bytes as the file size read_size = (read_size > _current_file_size) ? _current_file_size : read_size; - if (std::fseek(_current_fPtr, _file_headers[_curr_file_idx]._data_offset, SEEK_SET)) + if (std::fseek(_current_fPtr, _curr_file_header._data_offset, SEEK_SET)) THROW("Seek operation failed: " + std::strerror(errno)); - size_t actual_read_size = std::fread(buf, 1, _file_headers[_curr_file_idx].nbytes(), _current_fPtr); + size_t actual_read_size = std::fread(buf, 1, _curr_file_header.nbytes(), _current_fPtr); return actual_read_size; } diff --git a/rocAL_pybind/amd/rocal/fn.py b/rocAL_pybind/amd/rocal/fn.py index 1ec5289c2..8623dea2a 100644 --- a/rocAL_pybind/amd/rocal/fn.py +++ b/rocAL_pybind/amd/rocal/fn.py @@ -1161,3 +1161,22 @@ def random_object_bbox(*inputs, format='anchor_shape', background=0, cache_objec else: print('Wrong format passed to random_object_bbox') return () + +def transpose(*inputs, perm=[], output_layout=types.NHWC, output_dtype=types.UINT8): + # pybind call arguments + kwargs_pybind = {"input_image": inputs[0], "perm": perm, "is_output": False, "output_layout": output_layout, "output_dtype": output_dtype} + transposed_image = b.transpose(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return (transposed_image) + +def normalize(*inputs, axes=[], mean=[], stddev=[], scale=1.0, shift=0.0, output_layout=types.NHWC, output_dtype=types.UINT8): + # pybind call arguments + kwargs_pybind = {"input_image": inputs[0], "axes": axes, "mean": mean, "stddev": stddev, "is_output": False, + "scale": scale, "shift": shift, "output_layout": output_layout, "output_dtype": output_dtype} + normalized_image = b.normalize(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return (normalized_image) + +def cast(*inputs, output_dtype=types.UINT8): + # pybind call arguments + kwargs_pybind = {"input_image": inputs[0], "is_output": False, "output_dtype": output_dtype} + normalized_image = b.normalize(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return (normalized_image) diff --git a/rocAL_pybind/amd/rocal/readers.py b/rocAL_pybind/amd/rocal/readers.py index d45cc0201..b115a3d92 100644 --- a/rocAL_pybind/amd/rocal/readers.py +++ b/rocAL_pybind/amd/rocal/readers.py @@ -352,8 +352,8 @@ def mxnet(path, stick_to_shard=False, pad_last_batch=False): return mxnet_metadata -def numpy(*inputs, file_root='', num_shards=1, - random_shuffle=False, shard_id=0, files=[], stick_to_shard=False, pad_last_batch=False, seed=0): +def numpy(*inputs, file_root='', files=[], num_shards=1, + random_shuffle=False, shard_id=0, stick_to_shard=False, pad_last_batch=False, seed=0): Pipeline._current_pipeline._reader = "NumpyReader" # Output diff --git a/rocAL_pybind/examples/rocAL_api_numpy_reader.py b/rocAL_pybind/examples/rocAL_api_numpy_reader.py index e2961eddc..09e50a7f6 100644 --- a/rocAL_pybind/examples/rocAL_api_numpy_reader.py +++ b/rocAL_pybind/examples/rocAL_api_numpy_reader.py @@ -10,8 +10,9 @@ import sys import os, glob -val_cases_list = ['00000', '00003', '00005', '00006', '00012', '00024', '00034', '00041', '00044', '00049', '00052', '00056', '00061', '00065', '00066', '00070', '00076', '00078', '00080', '00084', - '00086', '00087', '00092', '00111', '00112', '00125', '00128', '00138', '00157', '00160', '00161', '00162', '00169', '00171', '00176', '00185', '00187', '00189', '00198', '00203', '00206', '00207'] + +MEAN = [0.026144592091441154, -88.3379898071289, -84.62094116210938, -78.56366729736328, -77.72217559814453, 7.33015557974337e-12, 48330.79296875, 87595.4296875, 183.57638549804688, 208.38265991210938, -7.185957863625792e-19, 109.64270782470703, 94.19403076171875, -0.37584438920021057, 9952.041015625, 20.362579345703125] +STDDEV = [108.9710922241211, 174.1948699951172, 173.99221801757812, 155.323486328125, 158.25418090820312, 0.14563894271850586, 58919.42578125, 24443.921875, 64.71000671386719, 77.63092041015625, 3.7348792830016464e-05, 242.97598266601562, 237.60250854492188, 5726.51611328125, 2953.1953125, 51.31494903564453] def load_data(path, files_pattern): data = sorted(glob.glob(os.path.join(path, files_pattern))) @@ -19,19 +20,10 @@ def load_data(path, files_pattern): return data def get_data_split(path: str): - imgs = load_data(path, "*_x.npy") - lbls = load_data(path, "*_y.npy") + imgs = load_data(path, "data-*.npy") + lbls = load_data(path, "label-*.npy") assert len(imgs) == len(lbls), f"Found {len(imgs)} volumes but {len(lbls)} corresponding masks" - imgs_train, lbls_train, imgs_val, lbls_val = [], [], [], [] - for (case_img, case_lbl) in zip(imgs, lbls): - if case_img.split("_")[-2] in val_cases_list: - imgs_val.append(case_img) - lbls_val.append(case_lbl) - else: - imgs_train.append(case_img) - lbls_train.append(case_lbl) - - return imgs_train, imgs_val, lbls_train, lbls_val + return imgs, lbls def main(): if len(sys.argv) < 3: @@ -45,63 +37,52 @@ def main(): except OSError as error: print(error) data_path = sys.argv[1] - if(sys.argv[2] == "cpu"): + data_path1 = sys.argv[2] + if(sys.argv[3] == "cpu"): rocal_cpu = True else: rocal_cpu = False - batch_size = int(sys.argv[3]) + batch_size = int(sys.argv[4]) num_threads = 8 device_id = 0 local_rank = 0 world_size = 1 random_seed = random.SystemRandom().randint(0, 2**32 - 1) - x_train, x_val, y_train, y_val = get_data_split(data_path) + x_train, y_train = get_data_split(data_path) + x_val, y_val = get_data_split(data_path1) import time start = time.time() - pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=2) + pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) with pipeline: - numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) - numpy_reader_output1 = fn.readers.numpy(file_root=data_path, files=y_train, shard_id=local_rank, num_shards=world_size, random_shuffle=True, seed=random_seed+local_rank) - data_output = fn.set_layout(numpy_reader_output, output_layout=types.NCDHW) - label_output = fn.set_layout(numpy_reader_output1, output_layout=types.NCDHW) - [roi_start, roi_end] = fn.random_object_bbox(label_output, format="start_end", k_largest=2, foreground_prob=0.4) - anchor = fn.roi_random_crop(label_output, roi_start=roi_start, roi_end=roi_end, crop_shape=(1, 128, 128, 128)) - data_sliced_output = fn.slice(data_output, anchor=anchor, shape=(1,128,128,128), output_layout=types.NCDHW, output_dtype=types.FLOAT) - label_sliced_output = fn.slice(label_output, anchor=anchor, shape=(1,128,128,128), output_layout=types.NCDHW, output_dtype=types.UINT8) - hflip = fn.random.coin_flip(probability=0.33) - vflip = fn.random.coin_flip(probability=0.33) - dflip = fn.random.coin_flip(probability=0.33) - data_flip_output = fn.flip(data_sliced_output, horizontal=hflip, vertical=vflip, depth=dflip, output_layout=types.NCDHW, output_dtype=types.FLOAT) - label_flip_output = fn.flip(label_sliced_output, horizontal=hflip, vertical=vflip, depth=dflip, output_layout=types.NCDHW, output_dtype=types.UINT8) - brightness = fn.random.uniform(range=[0.7, 1.3]) - add_brightness = fn.random.coin_flip(probability=0.1) - brightness_output = fn.brightness(data_flip_output, brightness=brightness, brightness_shift=0.0, conditional_execution=add_brightness, output_layout=types.NCDHW, output_dtype=types.FLOAT) - add_noise = fn.random.coin_flip(probability=0.5) - std_dev = fn.random.uniform(range=[0.0, 0.1]) - noise_output = fn.gaussian_noise(brightness_output, mean=0.0, std_dev=std_dev, conditional_execution=add_noise, output_layout=types.NCDHW, output_dtype=types.FLOAT) - pipeline.set_outputs(noise_output, label_flip_output) + numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_train, shard_id=local_rank, num_shards=world_size) + label_output = fn.readers.numpy(file_root=data_path, files=y_train, shard_id=local_rank, num_shards=world_size) + data_output = fn.set_layout(numpy_reader_output, output_layout=types.NHWC) + normalized_output = fn.normalize(data_output, axes=[0,1], mean=MEAN, stddev=STDDEV, output_layout=types.NHWC, output_dtype=types.FLOAT) + transposed_output = fn.transpose(normalized_output, perm=[2,0,1], output_layout=types.NCHW, output_dtype=types.FLOAT) + pipeline.set_outputs(transposed_output, label_output) pipeline.build() - pipeline1 = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) + val_pipeline = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=random_seed, rocal_cpu=rocal_cpu, prefetch_queue_depth=6) - with pipeline1: - numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_val, shard_id=local_rank, num_shards=world_size) - numpy_reader_output1 = fn.readers.numpy(file_root=data_path, files=y_val, shard_id=local_rank, num_shards=world_size) - data_output = fn.set_layout(numpy_reader_output, output_layout=types.NCDHW) - label_output = fn.set_layout(numpy_reader_output1, output_layout=types.NCDHW) - pipeline1.set_outputs(data_output, label_output) + with val_pipeline: + numpy_reader_output = fn.readers.numpy(file_root=data_path, files=x_val, shard_id=local_rank, num_shards=world_size, seed=random_seed+local_rank) + label_output = fn.readers.numpy(file_root=data_path, files=y_val, shard_id=local_rank, num_shards=world_size, seed=random_seed+local_rank) + data_output = fn.set_layout(numpy_reader_output, output_layout=types.NHWC) + normalized_output = fn.normalize(data_output, axes=[0,1], mean=MEAN, stddev=STDDEV, output_layout=types.NHWC, output_dtype=types.FLOAT) + transposed_output = fn.transpose(normalized_output, perm=[2,0,1], output_layout=types.NCHW, output_dtype=types.FLOAT) + val_pipeline.set_outputs(transposed_output, label_output) - pipeline1.build() + val_pipeline.build() numpyIteratorPipeline = ROCALNumpyIterator(pipeline, device='cpu' if rocal_cpu else 'gpu') print(len(numpyIteratorPipeline)) - valNumpyIteratorPipeline = ROCALNumpyIterator(pipeline1, device='cpu' if rocal_cpu else 'gpu', return_roi=True) + valNumpyIteratorPipeline = ROCALNumpyIterator(val_pipeline, device='cpu' if rocal_cpu else 'gpu') print(len(valNumpyIteratorPipeline)) cnt = 0 - for epoch in range(100): + for epoch in range(2): print("+++++++++++++++++++++++++++++EPOCH+++++++++++++++++++++++++++++++++++++",epoch) for i , it in enumerate(numpyIteratorPipeline): print(i, it[0].shape, it[1].shape) diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 1562575dd..578c5e572 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -725,5 +725,11 @@ PYBIND11_MODULE(rocal_pybind, m) { py::return_value_policy::reference); m.def("slice", &rocalSlice, py::return_value_policy::reference); + m.def("transpose", &rocalTranspose, + py::return_value_policy::reference); + m.def("normalize", &rocalNormalize, + py::return_value_policy::reference); + m.def("cast", &rocalCast, + py::return_value_policy::reference); } } // namespace rocal diff --git a/rocAL_pybind/setup.py b/rocAL_pybind/setup.py index 7d3598d35..9ee8e57ea 100644 --- a/rocAL_pybind/setup.py +++ b/rocAL_pybind/setup.py @@ -36,7 +36,7 @@ def has_ext_modules(self): setup( name='amd-rocal', description='AMD ROCm Augmentation Library', - url='https://github.com/GPUOpen-ProfessionalCompute-Libraries/MIVisionX/rocAL', + url='https://github.com/ROCm/rocAL', version='1.0.0', author='AMD', license='Apache License 2.0', diff --git a/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp b/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp index e48fe1d78..51265859f 100644 --- a/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp +++ b/tests/cpp_api_tests/rocAL_unittests/rocAL_unittests.cpp @@ -319,6 +319,12 @@ int test(int test_case, int reader_type, const char *path, const char *outName, rocalCreateMXNetReader(handle, path, true); decoded_output = rocalMXNetRecordSource(handle, path, color_format, num_threads, false, false, false, ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED, decode_max_width, decode_max_height); } break; + case 12: // Numpy reader + { + std::cout << ">>>>>>> Running Numpy reader" << std::endl; + pipeline_type = 4; + decoded_output = rocalNumpyFileSource(handle, path, num_threads, {}, false, false, false, ROCAL_USE_MAX_SIZE); + } break; default: { std::cout << ">>>>>>> Running IMAGE READER" << std::endl; pipeline_type = 1; @@ -766,6 +772,53 @@ int test(int test_case, int reader_type, const char *path, const char *outName, } } } break; + case 4: { // numpy reader pipeline + RocalTensorList output_tensor_list; + output_tensor_list = rocalGetOutputTensors(handle); + for (int idx = 0; idx < output_tensor_list->size(); idx++) { + unsigned char *out_buffer; + if (output_tensor_list->at(idx)->data_type() == RocalTensorOutputType::ROCAL_FP32) { + float *out_f_buffer; + std::cout << "Creating float buffer of "; + for (auto x : output_tensor_list->at(idx)->shape()) + std::cout << x << " x "; + std::cout << "shape\n"; + if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_GPU) { + out_f_buffer = (float *)malloc(output_tensor_list->at(idx)->data_size()); + output_tensor_list->at(idx)->copy_data(out_f_buffer); + } else if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_CPU) + out_f_buffer = (float *)output_tensor_list->at(idx)->buffer(); + + out_buffer = (unsigned char *)malloc(output_tensor_list->at(idx)->data_size() / 4); + // convert_float_to_uchar_buffer(out_f_buffer, out_buffer, output_tensor_list->at(idx)->data_size() / 4); + } else if (output_tensor_list->at(idx)->data_type() == RocalTensorOutputType::ROCAL_FP16) { + half *out_f16_buffer; + std::cout << "Creating float16 buffer of "; + for (auto x : output_tensor_list->at(idx)->shape()) + std::cout << x << " x "; + std::cout << "shape\n"; + if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_GPU) { + out_f16_buffer = (half *)malloc(output_tensor_list->at(idx)->data_size()); + output_tensor_list->at(idx)->copy_data(out_f16_buffer); + } else if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_CPU) + out_f16_buffer = (half *)output_tensor_list->at(idx)->buffer(); + + out_buffer = (unsigned char *)malloc(output_tensor_list->at(idx)->data_size() / 2); + // convert_float_to_uchar_buffer(out_f16_buffer, out_buffer, output_tensor_list->at(idx)->data_size() / 2); + } else { + std::cout << "Creating uchar buffer of "; + for (auto x : output_tensor_list->at(idx)->shape()) + std::cout << x << " x "; + std::cout << "shape\n"; + if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_GPU) { + out_buffer = (unsigned char *)malloc(output_tensor_list->at(idx)->data_size()); + output_tensor_list->at(idx)->copy_data(out_buffer); + } else if (output_tensor_list->at(idx)->backend() == RocalTensorBackend::ROCAL_CPU) + out_buffer = (unsigned char *)(output_tensor_list->at(idx)->buffer()); + } + } + std::cout << "Copied numpy data to buffers\n"; + } break; default: { std::cout << "Not a valid pipeline type ! Exiting!\n"; return -1;