swetha097 · swetha097 · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023 · sampath1117
diff --git a/rocAL/rocAL_pybind/CMakeLists.txt b/rocAL/rocAL_pybind/CMakeLists.txt
@@ -95,18 +95,21 @@ endif()
 if(${BUILD_ROCAL_PYBIND})
 
     link_directories(${ROCM_PATH}/lib)
-    include_directories(${AMDRPP_INCLUDE_DIRS})
-    include_directories(../rocAL/include/api/
+
+    include_directories(../rocAL/include/
+                        ../rocAL/include/api/
+                        ../rocAL/include/pipeline/
+                        ../rocAL/include/device/
                         third_party_lib/
                         ../../amd_openvx/openvx/include/)
 
     add_subdirectory(third_party_lib/pybind11)
 
     pybind11_add_module(rocal_pybind rocal_pybind.cpp)
-    target_link_libraries(rocal_pybind PRIVATE ${AMDRPP_LIBRARIES} vx_rpp rocal)
+    target_link_libraries(rocal_pybind PRIVATE rocal  vx_rpp amd_rpp)
     message("-- ${White}rocal_pybind -- CMAKE_CXX_FLAGS:${CMAKE_CXX_FLAGS}${ColourReset}")
     install(TARGETS rocal_pybind DESTINATION lib)
-    message("-- ${Green}ROCm Augmentation Library Python Binding - rocal_pybind module added ${ColourReset}")
+    message("-- ${Green}Radeon Augmentation Library Python Binding - rocal_pybind module added ${ColourReset}")
 else()
     message("-- ${Red}WARNING: rocAL Pybind module excluded ${ColourReset}")
 endif()
diff --git a/rocAL/rocAL_pybind/amd/__pycache__/__init__.cpython-37.pyc b/rocAL/rocAL_pybind/amd/__pycache__/__init__.cpython-37.pyc
diff --git a/rocAL/rocAL_pybind/amd/rocal/__pycache__/__init__.cpython-37.pyc b/rocAL/rocAL_pybind/amd/rocal/__pycache__/__init__.cpython-37.pyc
diff --git a/rocAL/rocAL_pybind/amd/rocal/decoders.py b/rocAL/rocAL_pybind/amd/rocal/decoders.py
@@ -43,9 +43,9 @@ def image(*inputs, user_feature_key_map=None, path='', file_root='', annotations
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": decode_size_policy,
-            "max_width": max_decoded_width,
-            "max_height": max_decoded_height,
-            "dec_type": decoder_type}
+            "max_width": max_decoded_width, # 1024
+            "max_height": max_decoded_height, # 1024
+            "dec_type": decoder_type} # USER_GIVEN_SIZE_ORIG
         decoded_image = b.COCO_ImageDecoderShard(Pipeline._current_pipeline._handle, *(kwargs_pybind.values()))
 
     elif (reader == "TFRecordReaderClassification" or reader == "TFRecordReaderDetection"):
@@ -164,8 +164,6 @@ def image_random_crop(*inputs, user_feature_key_map=None, path='', file_root='',
             "color_format": output_type,
             "num_shards": num_shards,
             'is_output': False,
-            "user_key_for_encoded": user_feature_key_map["image/encoded"],
-            "user_key_for_filename": user_feature_key_map["image/filename"],
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": decode_size_policy,
@@ -303,3 +301,5 @@ def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0,
             "max_height": max_decoded_height}
         image_decoder_slice = b.FusedDecoderCropShard(Pipeline._current_pipeline._handle, *(kwargs_pybind.values()))
     return (image_decoder_slice)
+
+
diff --git a/rocAL/rocAL_pybind/amd/rocal/fn.py b/rocAL/rocAL_pybind/amd/rocal/fn.py
diff --git a/rocAL/rocAL_pybind/amd/rocal/noise.py b/rocAL/rocAL_pybind/amd/rocal/noise.py
diff --git a/rocAL/rocAL_pybind/amd/rocal/pipeline.py b/rocAL/rocAL_pybind/amd/rocal/pipeline.py
@@ -95,7 +95,7 @@ class Pipeline(object):
     _handle = None
     _current_pipeline = None
 
-    def __init__(self, batch_size=-1, num_threads=-1, device_id=-1, seed=1,
+    def __init__(self, batch_size=-1, num_threads=-1, device_id=-1, seed=-1,
                  exec_pipelined=True, prefetch_queue_depth=2,
                  exec_async=True, bytes_per_sample=0,
                  rocal_cpu=False, max_streams=-1, default_cuda_stream_priority=0, tensor_layout = types.NCHW, reverse_channels = False, multiplier = [1.0,1.0,1.0], offset = [0.0, 0.0, 0.0], tensor_dtype=types.FLOAT):
@@ -137,6 +137,7 @@ def __init__(self, batch_size=-1, num_threads=-1, device_id=-1, seed=1,
         self._name = None
         self._anchors = None
         self._BoxEncoder = None
+        self._BoxIOUMatcher = None
         self._encode_tensor = None
         self._numOfClasses = None
         self._oneHotEncoding = False
@@ -155,7 +156,7 @@ def build(self):
             exit(0)
         return self
 
-    def run(self):
+    def rocalRun(self):
         """ Run the pipeline using rocalRun call
         """
         status = b.rocalRun(self._handle)
@@ -229,7 +230,7 @@ def GetOneHotEncodedLabels(self, array, device):
 
     def set_outputs(self, *output_list):
         self._output_list_length = len(output_list)
-        b.setOutputImages(self._handle,len(output_list),output_list)
+        b.setOutputImages(self._handle, len(output_list), output_list)
 
     def __enter__(self):
         Pipeline._current_pipeline = self
@@ -329,6 +330,44 @@ def isEmpty(self):
 
     def Timing_Info(self):
         return b.getTimingInfo(self._handle)
+
+    def rocalGetImageLabels(self):
+        return b.rocalGetImageLabels(self._handle)
+
+    def rocalGetBoundingBoxLabel(self):
+        return b.rocalGetBoundingBoxLabel(self._handle)
+
+    def rocalGetBoundingBoxCords(self):
+        return b.rocalGetBoundingBoxCords(self._handle)
+
+    def rocalGetBoundingBoxCount(self):
+        return b.rocalGetBoundingBoxCount(self._handle)
+
+    def rocalGetMatchedIndices(self):
+        return b.rocalGetMatchedIndices(self._handle)
+
+    def copy_out_data_ptr(self, data_ptr):
+        return b.copy_data_ptr(self._handle, data_ptr)
+
+    def rocalGetOutputTensors(self):
+        return b.rocalGetOutputTensors(self._handle)
+
+    def run(self):
+        """
+        It rises StopIteration if data set reached its end.
+        return:
+        :return:
+        A list of `rocalTensorList` objects for respective pipeline outputs.
+        """
+        try:
+            print("getRemainingImages :", self.getRemainingImages())
+            if self.getRemainingImages() > 0:
+                self.rocalRun()
+                return b.rocalGetOutputTensors(self._handle)
+        except:
+                print("Raise stop iter")
+                raise StopIteration
+
 
 def _discriminate_args(func, **func_kwargs):
     """Split args on those applicable to Pipeline constructor and the decorated function."""
@@ -459,4 +498,4 @@ def create_pipeline(*args, **kwargs):
         create_pipeline._is_pipeline_def = True
         return create_pipeline
 
-    return actual_decorator(fn) if fn else actual_decorator
+    return actual_decorator(fn) if fn else actual_decorator
diff --git a/rocAL/rocAL_pybind/amd/rocal/plugin/generic.py b/rocAL/rocAL_pybind/amd/rocal/plugin/generic.py
@@ -22,33 +22,36 @@
 import numpy as np
 import rocal_pybind as b
 import amd.rocal.types as types
+import ctypes
 
 class ROCALGenericImageIterator(object):
     def __init__(self, pipeline):
         self.loader = pipeline
-        self.w = b.getOutputWidth(self.loader._handle)
-        self.h = b.getOutputHeight(self.loader._handle)
-        self.n = b.getOutputImageCount(self.loader._handle)
-        color_format = b.getOutputColorFormat(self.loader._handle)
-        self.p = (1 if (color_format == int(types.GRAY)) else 3)
-        height = self.h*self.n
-        self.out_tensor = None
-        self.out_bbox = None
-        self.out_image = np.zeros((height, self.w, self.p), dtype = "uint8")
-        self.bs = pipeline._batch_size
+        self.batch_size = pipeline._batch_size
+        self.out = None
 
     def next(self):
         return self.__next__()
 
     def __next__(self):
+
         if(self.loader.isEmpty()):
             raise StopIteration
 
         if self.loader.run() != 0:
             raise StopIteration
 
-        self.loader.copyImage(self.out_image)
-        return self.out_image , self.out_tensor
+        else:
+            self.output_tensor_list = self.loader.rocalGetOutputTensors()
+
+        self.augmentation_count = len(self.output_tensor_list)
+        self.w = self.output_tensor_list[0].batch_width() if self.w is None else self.w
+        self.h = self.output_tensor_list[0].batch_height() if self.h is None else self.h
+        self.batch_size = self.output_tensor_list[0].batch_size() if self.batch_size is None else self.batch_size
+        self.color_format = self.output_tensor_list[0].color_format() if self.color_format is None else self.color_format
+        self.output_tensor_list[0].copy_data(ctypes.c_void_p(self.out.data_ptr()))
+
+        return self.out
 
     def reset(self):
         b.rocalResetLoaders(self.loader._handle)
@@ -68,48 +71,13 @@ def __init__(self, pipeline, tensor_layout = types.NCHW, reverse_channels = Fals
         self.reverse_channels = reverse_channels
         self.tensor_dtype = tensor_dtype
         self.display = display
-        self.w = b.getOutputWidth(self.loader._handle)
-        self.h = b.getOutputHeight(self.loader._handle)
-        self.n = b.getOutputImageCount(self.loader._handle)
         self.bs = pipeline._batch_size
         if self.loader._name is None:
             self.loader._name= self.loader._reader
         color_format = b.getOutputColorFormat(self.loader._handle)
         self.p = (1 if (color_format == int(types.GRAY)) else 3)
         self.labels_size = ((self.bs*self.loader._numOfClasses) if (self.loader._oneHotEncoding == True) else self.bs)
-        if tensor_layout == types.NCHW:
-            if self.device == "cpu":
-                if self.tensor_dtype == types.FLOAT:
-                    self.out = np.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=np.float32)
-                elif self.tensor_dtype == types.FLOAT16:
-                    self.out = np.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=np.float16)
-                self.labels = np.empty(self.labels_size, dtype = np.int32)
-
-            else:
-                with cp.cuda.Device(device=self.device_id):
-                    if self.tensor_dtype == types.FLOAT:
-                        self.out = cp.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=cp.float32)
-                    elif self.tensor_dtype == types.FLOAT16:
-                        self.out = cp.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=cp.float16)
-                    self.labels = cp.empty(self.labels_size, dtype = cp.int32)
-
-        else: #NHWC
-            if self.device == "cpu":
-                if self.tensor_dtype == types.FLOAT:
-                    self.out = np.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=np.float32)
-                elif self.tensor_dtype == types.FLOAT16:
-                    self.out = np.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=np.float16)
-                self.labels = np.empty(self.labels_size, dtype = np.int32)
-
-            else:
-                with cp.cuda.Device(device=self.device_id):
-                    if self.tensor_dtype == types.FLOAT:
-                        self.out = cp.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=cp.float32)
-                    elif self.tensor_dtype == types.FLOAT16:
-                        self.out = cp.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=cp.float16)
-                    self.labels = cp.empty(self.labels_size, dtype = cp.int32)
-
-
+        self.out = None
         if self.bs != 0:
             self.len = b.getRemainingImages(self.loader._handle)//self.bs
         else:
@@ -119,27 +87,65 @@ def next(self):
         return self.__next__()
 
     def __next__(self):
+
         if(b.isEmpty(self.loader._handle)):
             raise StopIteration
-
         if self.loader.run() != 0:
             raise StopIteration
-
-        if(types.NCHW == self.tensor_format):
-            self.loader.copyToTensorNCHW(self.out, self.multiplier, self.offset, self.reverse_channels, int(self.tensor_dtype))
         else:
-            self.loader.copyToTensorNHWC(self.out, self.multiplier, self.offset, self.reverse_channels, int(self.tensor_dtype))
-
+            self.output_tensor_list =
+
+        self.augmentation_count = len(self.output_tensor_list)
+        self.w = self.output_tensor_list[0].batch_width() if self.w is None else self.w
+        self.h = self.output_tensor_list[0].batch_height() if self.h is None else self.h
+        self.batch_size = self.output_tensor_list[0].batch_size() if self.batch_size is None else self.batch_size
+        self.color_format = self.output_tensor_list[0].color_format() if self.color_format is None else self.color_format
+
+        if self.out is None:
+            if self.tensor_layout == types.NCHW:
+                if self.device == "cpu":
+                    if self.tensor_dtype == types.FLOAT:
+                        self.out = np.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=np.float32)
+                    elif self.tensor_dtype == types.FLOAT16:
+                        self.out = np.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=np.float16)
+                    self.labels = np.empty(self.labels_size, dtype = np.int32)
+
+                else:
+                    with cp.cuda.Device(device=self.device_id):
+                        if self.tensor_dtype == types.FLOAT:
+                            self.out = cp.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=cp.float32)
+                        elif self.tensor_dtype == types.FLOAT16:
+                            self.out = cp.empty((self.bs*self.n, self.p, int(self.h/self.bs), self.w,), dtype=cp.float16)
+                        self.labels = cp.empty(self.labels_size, dtype = cp.int32)
+
+            else: #NHWC
+                if self.device == "cpu":
+                    if self.tensor_dtype == types.FLOAT:
+                        self.out = np.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=np.float32)
+                    elif self.tensor_dtype == types.FLOAT16:
+                        self.out = np.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=np.float16)
+                    self.labels = np.empty(self.labels_size, dtype = np.int32)
+
+                else:
+                    with cp.cuda.Device(device=self.device_id):
+                        if self.tensor_dtype == types.FLOAT:
+                            self.out = cp.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=cp.float32)
+                        elif self.tensor_dtype == types.FLOAT16:
+                            self.out = cp.empty((self.bs*self.n, int(self.h/self.bs), self.w, self.p), dtype=cp.float16)
+                        self.labels = cp.empty(self.labels_size, dtype = cp.int32)
+
+        self.output_tensor_list[0].copy_data(ctypes.c_void_p(self.out.data_ptr()))
+
         if(self.loader._name == "labelReader"):
             if(self.loader._oneHotEncoding == True):
-                self.loader.GetOneHotEncodedLabels(self.labels, self.device)
-                self.labels_tensor = self.labels.reshape(-1, self.bs, self.loader._numOfClasses)
+                print("Support for OneHotLabels not given yet")
+                exit(0)
             else:
                 if self.display:
                     for i in range(self.bs):
                         img = (self.out)
                         draw_patches(img[i], i, 0)
-                self.loader.getImageLabels(self.labels)
+                self.labels = self.loader.rocalGetImageLabels()
                 if self.device == "cpu":
                     self.labels_tensor = self.labels.astype(dtype=np.int_)
                 else: