SundarRajan98 · SundarRajan98 · Oct 6, 2023 · Oct 6, 2023 · Oct 15, 2023 · Oct 15, 2023
diff --git a/apps/image_augmentation/image_augmentation.cpp b/apps/image_augmentation/image_augmentation.cpp
@@ -48,12 +48,12 @@ int main(int argc, const char** argv) {
     if (argc < MIN_ARG_COUNT) {
         printf(
             "Usage: image_augmentation <image_dataset_folder/video_file> <processing_device=1/cpu=0>  \
-              decode_width decode_height video_mode gray_scale/rgb display_on_off decode_shard_count  <shuffle:0/1> <jpeg_dec_mode<0(tjpeg)/1(opencv)/2(hwdec)>\n");
+              decode_width decode_height decoder_mode gray_scale/rgb display_on_off decode_shard_count  <shuffle:0/1> <jpeg_dec_mode<0(tjpeg)/1(opencv)/2(hwdec)>\n");
         return -1;
     }
     int argIdx = 0;
     const char* folderPath1 = argv[++argIdx];
-    int video_mode = 0;  // 0 means no video decode, 1 means hardware, 2 means software decoding
+    int decoder_mode = 0;  // 0 means no video decode, 1 means hardware, 2 means software decoding
     bool display = 1;    // Display the images
     int aug_depth = 1;   // how deep is the augmentation tree
     int rgb = 1;         // process color images
@@ -62,7 +62,7 @@ int main(int argc, const char** argv) {
     bool processing_device = 1;
     size_t shard_count = 2;
     int shuffle = 0;
-    int dec_mode = 0;
+    int decoder_type = 0;
     const char *outName = "image_augmentation_app.png";
 
     if (argc >= argIdx + MIN_ARG_COUNT)
@@ -75,7 +75,7 @@ int main(int argc, const char** argv) {
         decode_height = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
-        video_mode = atoi(argv[++argIdx]);
+        decoder_mode = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
         rgb = atoi(argv[++argIdx]);
@@ -90,7 +90,7 @@ int main(int argc, const char** argv) {
         shuffle = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
-        dec_mode = atoi(argv[++argIdx]);
+        decoder_type = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
         outName = argv[++argIdx];
@@ -108,7 +108,7 @@ int main(int argc, const char** argv) {
         return -1;
     }
 
-    RocalDecoderType dec_type = (RocalDecoderType)dec_mode;
+    RocalDecoderType dec_type = (RocalDecoderType)decoder_type;
 
     /*>>>>>>>>>>>>>>>> Creating rocAL parameters  <<<<<<<<<<<<<<<<*/
 
@@ -126,15 +126,20 @@ int main(int argc, const char** argv) {
     /*>>>>>>>>>>>>>>>>>>> Graph description <<<<<<<<<<<<<<<<<<<*/
     RocalTensor input1;
 
-    if (video_mode != 0) {
+    if (decoder_mode >= 2) {
         unsigned sequence_length = 3;
         unsigned frame_step = 3;
         unsigned frame_stride = 1;
         if (decode_height <= 0 || decode_width <= 0) {
             std::cout << "Output width and height is needed for video decode\n";
             return -1;
         }
-        input1 = rocalVideoFileSource(handle, folderPath1, color_format, ((video_mode == 1) ? RocalDecodeDevice::ROCAL_HW_DECODE : RocalDecodeDevice::ROCAL_SW_DECODE), shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false);
+        input1 = rocalVideoFileSource(handle, folderPath1, color_format, (decoder_mode == 2)? ROCAL_SW_DECODE: ROCAL_HW_DECODE, shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false);
+    } else if (decoder_mode == 1) {
+            std::vector<float> area = {0.08, 1};
+            std::vector<float> aspect_ratio = {3.0f / 4, 4.0f / 3};
+            input1 = rocalFusedJpegCrop(handle, folderPath1, color_format, shard_count, false, area, aspect_ratio, 10, false, false, ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED, decode_width, decode_height);
+
     } else {
         // The jpeg file loader can automatically select the best size to decode all images to that size
         // User can alternatively set the size or change the policy that is used to automatically find the size
@@ -152,7 +157,7 @@ int main(int argc, const char** argv) {
 
     RocalTensor tensor0;
     int resize_w = 112, resize_h = 112;
-    if (video_mode) {
+    if (decoder_mode >= 2) {
         resize_h = decode_height;
         resize_w = decode_width;
         tensor0 = input1;
@@ -214,7 +219,7 @@ int main(int argc, const char** argv) {
     int w = rocalGetOutputWidth(handle);
     int p = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? 3 : 1);
     std::cout << "output width " << w << " output height " << h << " color planes " << p << std::endl;
-    const unsigned number_of_cols = video_mode ? 1 : 10;
+    const unsigned number_of_cols = (decoder_mode >= 2) ? 1 : 10;
     auto cv_color_format = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? CV_8UC3 : CV_8UC1);
     cv::Mat mat_output(h + AMD_ROCm_Black_resize.rows, w * number_of_cols, cv_color_format);
     cv::Mat mat_input(h, w, cv_color_format);

diff --git a/docs/examples/image_processing/decoder.py b/docs/examples/image_processing/decoder.py
@@ -9,7 +9,7 @@
 import cupy as cp
 
 seed = 1549361629
-image_dir = "../../../../data/images/AMD-tinyDataSet/"
+image_dir = "../../../data/images/AMD-tinyDataSet/"
 batch_size = 4
 gpu_id = 0
 
@@ -34,13 +34,13 @@ def show_pipeline_output(pipe, device):
     pipe.build()
     data_loader = ROCALClassificationIterator(pipe, device)
     images = next(iter(data_loader))
-    show_images(images[0], device)
+    show_images(images[0][0], device)
 
 @pipeline_def(seed=seed)
 def image_decoder_pipeline(device="cpu", path=image_dir):
-    jpegs, labels = fn.readers.file(file_root=path, shard_id=0, num_shards=1, random_shuffle=False)
+    jpegs, labels = fn.readers.file(file_root=path)
     images = fn.decoders.image(jpegs, file_root=path, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)
-    return fn.resize(images, device=device, resize_x=300, resize_y=300)
+    return fn.resize(images, device=device, resize_width=300, resize_height=300)
 
 def main():
     print ('Optional arguments: <cpu/gpu image_folder>')
@@ -52,9 +52,8 @@ def main():
           rocal_device = "gpu"
     if  len(sys.argv) > 2:
       img_folder = sys.argv[2]
-
-    pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC,
-                                  reverse_channels=True, mean = [0, 0, 0], std=[255, 255, 255], device=rocal_device, path=img_folder)
+    pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, 
+                                reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=rocal_device, path=img_folder)
     show_pipeline_output(pipe, device=rocal_device)
 
 if __name__ == '__main__':

diff --git a/docs/examples/image_processing/decoder_examples.ipynb b/docs/examples/image_processing/decoder_examples.ipynb
@@ -38,7 +38,7 @@
     "%matplotlib inline\n",
     "\n",
     "seed = 1549361629\n",
-    "image_dir = \"../../../../data/images/AMD-tinyDataSet/\"\n",
+    "image_dir = \"../../../data/images/AMD-tinyDataSet/\"\n",
     "batch_size = 4\n",
     "gpu_id = 0\n",
     "\n",
@@ -61,7 +61,7 @@
     "    pipe.build()\n",
     "    data_loader = ROCALClassificationIterator(pipe, device, device_id)\n",
     "    images = next(iter(data_loader))\n",
-    "    show_images(images[0], device)\n"
+    "    show_images(images[0][0], device)\n"
    ]
   },
   {
@@ -82,9 +82,9 @@
    "source": [
     "@pipeline_def(seed=seed)\n",
     "def image_decoder_pipeline(device=\"cpu\"):\n",
-    "    jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n",
+    "    jpegs, labels = fn.readers.file(file_root=image_dir)\n",
     "    images = fn.decoders.image(jpegs, file_root=image_dir, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n",
-    "    return fn.resize(images, device=device, resize_x=300, resize_y=300)\n",
+    "    return fn.resize(images, device=device, resize_width=300, resize_height=300)\n",
     "\n",
     "pipe = image_decoder_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n",
     "                            reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=\"cpu\")\n",
@@ -109,12 +109,13 @@
    "source": [
     "@pipeline_def(seed=seed)\n",
     "def image_decoder_random_crop_pipeline(device=\"cpu\"):\n",
-    "    jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n",
+    "    jpegs, labels = fn.readers.file(file_root=image_dir)\n",
     "    images = fn.decoders.image_slice(jpegs, file_root=image_dir, \n",
-    "                                     device=device,\n",
     "                                     output_type=types.RGB,\n",
+    "                                     shard_id = 0,\n",
+    "                                     num_shards = 1,\n",
     "                                     random_shuffle=True)\n",
-    "    return fn.resize(images, device=device, resize_x=300, resize_y=300)\n",
+    "    return fn.resize(images, device=device, resize_width=300, resize_height=300)\n",
     "    \n",
     "pipe = image_decoder_random_crop_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n",
     "                                          reverse_channels=True, mean=[0,0,0], std = [255,255,255], device=\"cpu\")\n",
@@ -184,7 +185,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {

diff --git a/docs/examples/image_processing/inference_pipeline.py b/docs/examples/image_processing/inference_pipeline.py
@@ -31,7 +31,7 @@
 
 
 seed = 1549361629
-image_dir = "../../../../data/images/AMD-tinyDataSet/"
+image_dir = "../../../data/images/AMD-tinyDataSet/"
 batch_size = 4
 gpu_id = 0
 

diff --git a/rocAL-setup.py b/rocAL-setup.py
@@ -311,11 +311,11 @@
         os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
                   linuxSystemInstall_check+' install lmdb-devel rapidjson-devel')
 
-    # turbo-JPEG - https://github.com/rrawther/libjpeg-turbo.git -- 2.0.6.2
+    # turbo-JPEG - https://github.com/libjpeg-turbo/libjpeg-turbo.git -- 3.0.1
     os.system(
-        '(cd '+deps_dir+'; git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git )')
+        '(cd '+deps_dir+'; git clone -b 3.0.1 https://github.com/libjpeg-turbo/libjpeg-turbo.git )')
     os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake +
-              ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ..; make -j 4; sudo make install )')
+              ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..; make -j 4; sudo make install )')
     # RPP
     os.system('sudo -v')
     os.system('(cd '+deps_dir+'; git clone -b '+rppVersion+' https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp.git; cd rpp; mkdir build-'+backend+'; cd build-'+backend+'; ' +

diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt
@@ -42,6 +42,14 @@ find_package(RapidJSON QUIET)
 find_package(StdFilesystem QUIET)
 find_package(HALF QUIET)
 
+if(DEFINED ENV{ROCM_PATH})
+  set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
+elseif(ROCM_PATH)
+  message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
+else()
+  set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
+endif()
+
 # HIP Backend
 if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP")
     if(NOT DEFINED HIP_PATH)
@@ -225,6 +233,7 @@ if(${BUILD_ROCAL})
                 include/augmentations/geometry_augmentations/
                 include/decoders/image/
                 include/decoders/video/
+                include/decoders/libjpeg/
                 include/device/
                 include/loaders/
                 include/loaders/image/

diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h
@@ -329,6 +329,20 @@ extern "C" RocalTensor ROCAL_API_CALL rocalFlipFixed(RocalContext context, Rocal
                                                      RocalTensorLayout output_layout = ROCAL_NONE,
                                                      RocalTensorOutputType output_datatype = ROCAL_UINT8);
 
+/*! \brief Transposes the tensors by reordering the dimensions based on the perm parameter.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] perm Permutation of the dimensions of the input
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalTranspose(RocalContext context, RocalTensor input, std::vector<unsigned> perm, bool is_output,
+                                                RocalTensorLayout output_layout = ROCAL_NONE,
+                                                RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
 /*! \brief Applies blur effect to images.
  * \ingroup group_rocal_augmentations
  * \param [in] context Rocal context
@@ -997,6 +1011,30 @@ extern "C" RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext cont
                                                                RocalTensorLayout output_layout = ROCAL_NONE,
                                                                RocalTensorOutputType output_datatype = ROCAL_UINT8);
 
+/*! \brief Performs normalization on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] axes axes list for tensor normalization
+ * \param [in] mean mean value (specified for each channel) for tensor normalization
+ * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization
+ * \param [in] scale scale value (specified for each channel) for tensor normalization
+ * \param [in] shift shift value (specified for each channel) for tensor normalization
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] mirror controls horizontal flip of the tensor
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalNormalize(RocalContext context, RocalTensor input,
+                                                               std::vector<unsigned> &axes,
+                                                               std::vector<float> &mean,
+                                                               std::vector<float> &std_dev,
+                                                               bool is_output,
+                                                               float scale = 1.0, float shift = 0.0,
+                                                               RocalTensorLayout output_layout = ROCAL_NONE,
+                                                               RocalTensorOutputType output_datatype = ROCAL_UINT8);                                                               
+
 /*! \brief Crops images.
  * \ingroup group_rocal_augmentations
  * \param [in] context Rocal context
@@ -1162,7 +1200,17 @@ extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, R
                                                          RocalTensorLayout output_layout = ROCAL_NONE,
                                                          RocalTensorOutputType output_datatype = ROCAL_UINT8);
 
+/**
+ * \brief Cast input tensor from one data type to another 
+ * \param context Rocal context
+ * \param input Input tensor
+ * \param is_output Sets if the output is to be given to user or as intermediate buffer
+ * \param output_datatype Datatype of the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCast(RocalContext context, RocalTensor input,
+                                                bool is_output,
+                                                RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
 extern "C" RocalTensor ROCAL_API_CALL rocalSetLayout(RocalContext context, RocalTensor input,
                                                      RocalTensorLayout output_layout = ROCAL_NONE);
-
 #endif  // MIVISIONX_ROCAL_API_AUGMENTATION_H
diff --git a/rocAL/include/augmentations/augmentations_nodes.h b/rocAL/include/augmentations/augmentations_nodes.h
@@ -57,3 +57,6 @@ THE SOFTWARE.
 #include "node_sequence_rearrange.h"
 #include "node_gaussian_noise.h"
 #include "node_slice.h"
+#include "node_transpose.h"
+#include "node_normalize.h"
+#include "node_cast.h"
diff --git a/rocAL/include/augmentations/effects_augmentations/node_normalize.h b/rocAL/include/augmentations/effects_augmentations/node_normalize.h
@@ -0,0 +1,46 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "node.h"
+#include "parameter_vx.h"
+
+class NormalizeNode : public Node {
+   public:
+    NormalizeNode(const std::vector<Tensor *> &inputs,
+                            const std::vector<Tensor *> &outputs);
+    NormalizeNode() = delete;
+    void init(std::vector<unsigned> &axes, std::vector<float> &mean, std::vector<float> &std_dev, float scale, float shift);
+
+   protected:
+    void create_node() override;
+    void update_node() override {};
+
+   private:
+    int _axis_mask = 0;
+    uint _compute_mean, _compute_stddev;
+    vx_array _mean_vx_array, _stddev_vx_array;
+    std::vector<unsigned> _axes;
+    std::vector<float> _mean, _std_dev;
+    float _scale, _shift;
+    std::vector<std::vector<uint32_t>> _normalize_roi;
+};
diff --git a/rocAL/include/augmentations/geometry_augmentations/node_transpose.h b/rocAL/include/augmentations/geometry_augmentations/node_transpose.h
@@ -0,0 +1,40 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "node.h"
+#include "parameter_vx.h"
+
+class TransposeNode : public Node {
+   public:
+    TransposeNode(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs);
+    TransposeNode() = delete;
+    void init(std::vector<unsigned> perm);
+
+   protected:
+    void create_node() override;
+    void update_node() override {};
+
+   private:
+    std::vector<unsigned> _perm;
+    vx_array _perm_array;
+};