Visual-Behavior · thibo73800 · Sep 14, 2021 · Sep 14, 2021 · Sep 14, 2021
diff --git a/alodataset/coco_detection_dataset.py b/alodataset/coco_detection_dataset.py
@@ -37,47 +37,47 @@ def __init__(
         **kwargs,
     ):
         """
-    Attributes
-    ----------
-    CATEGORIES : set
-        List of all unique tags read from the database
-    labels_names : list
-        List of labels according to their corresponding positions
-    prepare : :mod:`BaseDataset <base_dataset>`
-
-    Parameters
-    ----------
-    img_folder : str
-        Path to the image folder relative at `dataset_dir` (stored into the aloception config file)
-    ann_file : str
-        Path to the annotation file relative at `dataset_dir` (stored into the aloception config file)
-    name : str, optional
-        Key of database name in `alodataset_config.json` file, by default *coco*
-    return_masks : bool, optional
-        Include masks labels in the output, by default False
-    classes : list, optional
-        List of classes to be filtered in the annotation reading process, by default None
-    stuff_ann_file: str, optional
-        Additional annotations with new classes, by default None
-    **kwargs : dict
-        :mod:`BaseDataset <base_dataset>` optional parameters
-
-    Raises
-    ------
-    Exception
-        If a classes list is decided, each label must be inside of :attr:`CATEGORIES` list attribute
+        Attributes
+        ----------
+        CATEGORIES : set
+            List of all unique tags read from the database
+        labels_names : list
+            List of labels according to their corresponding positions
+        prepare : :mod:`BaseDataset <base_dataset>`
 
-    Examples
-    --------
-        >>> coco_ds = CocoDetectionDataset(
-        ... img_folder = "val2017",
-        ... ann_file = "annotations/instances_val2017.json",
-        ... mode = "validation"
-        )
-        >>> frames = next(iter(coco_ds.train_loader()))
-        >>> frames = frames[0].batch_list(frames)
-        >>> frames.get_view(frames.boxes2d,).render()
-    """
+        Parameters
+        ----------
+        img_folder : str
+            Path to the image folder relative at `dataset_dir` (stored into the aloception config file)
+        ann_file : str
+            Path to the annotation file relative at `dataset_dir` (stored into the aloception config file)
+        name : str, optional
+            Key of database name in `alodataset_config.json` file, by default *coco*
+        return_masks : bool, optional
+            Include masks labels in the output, by default False
+        classes : list, optional
+            List of classes to be filtered in the annotation reading process, by default None
+        stuff_ann_file: str, optional
+            Additional annotations with new classes, by default None
+        **kwargs : dict
+            :mod:`BaseDataset <base_dataset>` optional parameters
+
+        Raises
+        ------
+        Exception
+            If a classes list is decided, each label must be inside of :attr:`CATEGORIES` list attribute
+
+        Examples
+        --------
+            >>> coco_ds = CocoDetectionDataset(
+            ... img_folder = "val2017",
+            ... ann_file = "annotations/instances_val2017.json",
+            ... mode = "validation"
+            )
+            >>> frames = next(iter(coco_ds.train_loader()))
+            >>> frames = frames[0].batch_list(frames)
+            >>> frames.get_view(frames.boxes2d,).render()
+        """
 
         if "sample" not in kwargs:
             kwargs["sample"] = False
@@ -140,7 +140,7 @@ def __init__(
         self.items = self.ids
 
     def getitem(self, idx):
-        """ Get the :mod:`Frame <aloscene.frame>` corresponds to *idx* index
+        """Get the :mod:`Frame <aloscene.frame>` corresponds to *idx* index
 
         Parameters
         ----------
@@ -298,14 +298,7 @@ def show_random_frame(coco_loader):
 
 def main():
     """Main"""
-    # coco_dataset = CocoDetectionDataset(
-    #     img_folder="val2017",
-    #     stuff_ann_file="annotations/stuff_val2017.json",
-    #     ann_file="annotations/instances_val2017.json",
-    #     return_masks=True,
-    # )
     coco_dataset = CocoDetectionDataset(sample=True)
-
     for f, frames in enumerate(coco_dataset.train_loader(batch_size=2)):
         frames = Frame.batch_list(frames)
         frames.get_view().render()

diff --git a/alodataset/sintel_disparity_dataset.py b/alodataset/sintel_disparity_dataset.py
@@ -106,7 +106,8 @@ def _get_camera_frames(self, sequence_data, camera):
 
 if __name__ == "__main__":
     dataset = SintelDisparityDataset(sample=True)
-    # show some frames at various indices
-    for idx in [1, 2, 5]:
-        frames = dataset.getitem(idx)["left"]
-        frames.get_view().render()
+
+    for f, frames in enumerate(dataset.train_loader(batch_size=2)):
+        frames = Frame.batch_list(frames)
+        frames["left"].get_view().render()
+        break
diff --git a/alodataset/sintel_flow_dataset.py b/alodataset/sintel_flow_dataset.py
@@ -96,7 +96,8 @@ def _get_camera_frames(self, sequence_data, camera):
 
 if __name__ == "__main__":
     dataset = SintelFlowDataset(sample=True)
-    # show some frames at various indices
-    for idx in [1, 2, 5]:
-        frames = dataset.getitem(idx)["left"]
-        frames.get_view().render()
+
+    for f, frames in enumerate(dataset.train_loader(batch_size=2)):
+        frames = Frame.batch_list(frames)
+        frames["left"].get_view().render()
+        break
diff --git a/aloscene/frame.py b/aloscene/frame.py
@@ -25,7 +25,6 @@ def __new__(
         boxes2d: Union[dict, BoundingBoxes2D] = None,
         boxes3d: Union[dict, BoundingBoxes3D] = None,
         flow: Flow = None,
-        mask: Mask = None,
         segmentation: Mask = None,
         disparity: Disparity = None,
         normalization="255",
@@ -45,7 +44,6 @@ def __new__(
         tensor.add_label("boxes2d", boxes2d, align_dim=["B", "T"], mergeable=False)
         tensor.add_label("boxes3d", boxes3d, align_dim=["B", "T"], mergeable=False)
         tensor.add_label("flow", flow, align_dim=["B", "T"], mergeable=False)
-        tensor.add_label("mask", mask, align_dim=["B", "T"], mergeable=True)
         tensor.add_label("disparity", disparity, align_dim=["B", "T"], mergeable=True)
         tensor.add_label("segmentation", segmentation, align_dim=["B", "T"], mergeable=False)
 
@@ -99,19 +97,6 @@ def append_boxes3d(self, boxes_3d: BoundingBoxes3D, name: str = None):
         """
         self._append_label("boxes3d", boxes_3d, name)
 
-    def append_mask(self, mask: Mask, name: str = None):
-        """Attach a mask to the frame.
-
-        Parameters
-        ----------
-        mask: aloscene.Mask
-            Mask to attached to the Frame
-        name: str
-            If none, the mask will be attached without name (if possible). Otherwise if no other unnamed
-            mask are attached to the frame, the mask will be added to the set of mask.
-        """
-        self._append_label("mask", mask, name)
-
     def append_flow(self, flow, name=None):
         """Attach a flow to the frame.
 

diff --git a/aloscene/mask.py b/aloscene/mask.py
@@ -28,7 +28,7 @@ def __new__(cls, x, labels: Union[dict, Labels] = None, *args, **kwargs):
             x = load_mask(x)
             kwargs["names"] = ("N", "H", "W")
         tensor = super().__new__(cls, x, *args, **kwargs)
-        tensor.add_label("labels", labels, align_dim=["N"], mergeable=True)
+        tensor.add_label("labels", labels, align_dim=["N"], mergeable=False)
         return tensor
 
     def __init__(self, x, *args, **kwargs):

diff --git a/aloscene/tensors/augmented_tensor.py b/aloscene/tensors/augmented_tensor.py
@@ -859,17 +859,22 @@ def pad(self, offset_y: tuple, offset_x: tuple, **kwargs):
 
         Parameters
         ----------
-        offset_y: tuple
-            (percentage top_offset, percentage bottom_offset) Percentage based on the previous size
-        offset_x: tuple
-            (percentage left_offset, percentage right_offset) Percentage based on the previous size
-
+        offset_y: tuple of float or tuple of int
+            (percentage top_offset, percentage bottom_offset) Percentage based on the previous size If tuple of int
+            the absolute value will be converted to float (percentahe) before to be applied.
+        offset_x: tuple of float or tuple of int
+            (percentage left_offset, percentage right_offset) Percentage based on the previous size. If tuple of int
+            the absolute value will be converted to float (percentage) before to be applied.
 
         Returns
         -------
         croped : aloscene AugmentedTensor
             croped tensor
         """
+        if isinstance(offset_y[0], int) and isinstance(offset_y[1], int):
+            offset_y = (offset_y[0] / self.H, offset_y[1] / self.H)
+        if isinstance(offset_x[0], int) and isinstance(offset_x[1], int):
+            offset_x = (offset_x[0] / self.W, offset_x[1] / self.W)
 
         padded = self._pad(offset_y, offset_x, **kwargs)
         padded.recursive_apply_on_labels_(lambda label: self._pad_label(label, offset_y, offset_x, **kwargs))

diff --git a/aloscene/tensors/spatial_augmented_tensor.py b/aloscene/tensors/spatial_augmented_tensor.py
@@ -13,11 +13,20 @@
 
 class SpatialAugmentedTensor(AugmentedTensor):
     @staticmethod
-    def __new__(cls, x, *args, cam_intrinsic: CameraIntrinsic = None, cam_extrinsic: CameraExtrinsic = None, **kwargs):
+    def __new__(
+        cls,
+        x,
+        *args,
+        cam_intrinsic: CameraIntrinsic = None,
+        cam_extrinsic: CameraExtrinsic = None,
+        mask=None,
+        **kwargs,
+    ):
         tensor = super().__new__(cls, x, *args, **kwargs)
         # Add camera parameters as labels
         tensor.add_label("cam_intrinsic", cam_intrinsic, align_dim=["B", "T"], mergeable=True)
         tensor.add_label("cam_extrinsic", cam_extrinsic, align_dim=["B", "T"], mergeable=True)
+        tensor.add_label("mask", mask, align_dim=["B", "T"], mergeable=True)
         return tensor
 
     def __init__(self, x, *args, **kwargs):
@@ -35,6 +44,19 @@ def W(self):
     def H(self):
         return self.shape[self.names.index("H")]
 
+    def append_mask(self, mask, name: str = None):
+        """Attach a mask to the frame.
+
+        Parameters
+        ----------
+        mask: aloscene.Mask
+            Mask to attached to the Frame
+        name: str
+            If none, the mask will be attached without name (if possible). Otherwise if no other unnamed
+            mask are attached to the frame, the mask will be added to the set of mask.
+        """
+        self._append_label("mask", mask, name)
+
     def append_cam_intrinsic(self, cam_intrinsic: CameraIntrinsic):
         self._append_label("cam_intrinsic", cam_intrinsic)
 
@@ -218,15 +240,17 @@ def batch_list(sa_tensors: list, pad_boxes: bool = False):
 
         Parameters
         ----------
-        sa_tensors: list
-            List of any aloscene.tensors.SpatialAugmentedTensor (or list of dicts or SpatialAugmentedTensor)
+        sa_tensors: list or dict
+            List of any aloscene.tensors.SpatialAugmentedTensor. If dict is given, this method will be applied on each
+            list of spatial augmented tensors within the list
         pad_boxes: bool
             By default, do not rescale the boxes attached to the sptial augmented Tensor (see explanation in boxes2d.pad)
 
         Returns
         -----------
-        batch_frame: a child of aloscene.tensors.SpatialAugmentedTensor (or dict of SpatialAugmentedTensor)
-            Multiple sa_tensor with mask label
+        aloscene.tensors.SpatialAugmentedTensor
+            A child of aloscene.tensors.SpatialAugmentedTensor (or dict of SpatialAugmentedTensor)
+            with `mask` label to keep track of the padded areas.
         """
         assert len(sa_tensors) >= 1 and isinstance(sa_tensors, list)
         frame0 = sa_tensors[0]
@@ -240,28 +264,33 @@ def batch_list(sa_tensors: list, pad_boxes: bool = False):
         max_h, max_w = 0, 0
         dtype = sa_tensors[0].dtype
         device = sa_tensors[0].device
-        normalization = sa_tensors[0].normalization
-        mean_std = sa_tensors[0].mean_std
-        instance = type(sa_tensors[0])
+
+        if (
+            "N" in sa_tensors[0].names
+            or "C" not in sa_tensors[0].names
+            or "H" not in sa_tensors[0].names
+            or "W" not in sa_tensors[0].names
+        ):
+            raise Exception(
+                "{} (with names: {}) as it is, does not seem to be mergeable using batch_list.".format(
+                    type(sa_tensors[0]), sa_tensors[0].names
+                )
+            )
 
         # Retrieve the target size
         for i, frame in enumerate(sa_tensors):
-            max_h, max_w = max(frame.H, max_h), max(frame.W, max_w)
+            if frame is not None:
+                max_h, max_w = max(frame.H, max_h), max(frame.W, max_w)
 
         saved_frame_labels = {}
         n_sa_tensors = []
-        for i, frame in enumerate(sa_tensors):
+        for i, n_frame in enumerate(sa_tensors):
+            if n_frame is None:
+                continue
             # Add the batch dimension and drop the labels
-            n_sa_tensors.append(frame.batch())
-
+            n_sa_tensors.append(n_frame.batch())
+            frame = n_frame
             labels = n_sa_tensors[i].get_labels()
-            # Pad labels
-            labels = AugmentedTensor.apply_on_label(
-                labels,
-                lambda l: l.pad(
-                    (0, max_h / frame.H - 1), (0, max_w / frame.W - 1), frame_size=frame.HW, pad_boxes=pad_boxes
-                ),
-            )
 
             # Merge labels on the first dim (TODO, move on an appropriate method)
             # The following can be merge into an other method in the augmented_tensor class that do roughly the same thing
@@ -303,36 +332,40 @@ def batch_list(sa_tensors: list, pad_boxes: bool = False):
         batch_size = len(n_sa_tensors)
         # Retrieve the new shapes and dim names
         n_tensor_shape, n_mask_shape = list(n_sa_tensors[0].shape), list(n_sa_tensors[0].shape)
+        # New target frame size
         n_tensor_shape[0], n_mask_shape[0] = batch_size, batch_size
         n_tensor_shape[n_sa_tensors[0].names.index("H")] = max_h
         n_tensor_shape[n_sa_tensors[0].names.index("W")] = max_w
+        # New target mask shape
         n_mask_shape[n_sa_tensors[0].names.index("H")] = max_h
         n_mask_shape[n_sa_tensors[0].names.index("W")] = max_w
         n_mask_shape[n_sa_tensors[0].names.index("C")] = 1
         n_names = n_sa_tensors[0].names
 
+        n_padded_list = []
+        for spatial_tensor in n_sa_tensors:
+            h_pad = (0, max_h - spatial_tensor.H)
+            w_pad = (0, max_w - spatial_tensor.W)
+            padded_spatial_tensor = spatial_tensor.pad(h_pad, w_pad, pad_boxes=pad_boxes)
+            n_padded_list.append(padded_spatial_tensor)
+
+        n_augmented_tensors = torch.cat(n_padded_list, dim=0)
+
         # Set the new mask and tensor buffer filled up with zeros and ones
         # Also, for normalized frames, the zero value is actually different based on the mean/std
         n_tensor = torch.zeros(tuple(n_tensor_shape), dtype=dtype, device=device)
-        if mean_std is not None:
-            mean_tensor, std_tensor = instance._get_mean_std_tensor(
-                tuple(n_mask_shape), n_names, mean_std, device=device
-            )
-            n_tensor = n_tensor - mean_tensor
-            n_tensor = n_tensor / std_tensor
-        n_mask = torch.ones(tuple(n_mask_shape), dtype=torch.float, device=device)
 
+        # Create the batch list mask
+        n_mask = torch.ones(tuple(n_mask_shape), dtype=torch.float, device=device)
         for b, frame in enumerate(n_sa_tensors):
             n_slice = frame.get_slices({"B": b, "H": slice(None, frame.H), "W": slice(None, frame.W)})
             n_tensor[n_slice].copy_(frame[0])
             n_mask[n_slice] = 0
 
-        n_frame = instance(n_tensor, names=n_names, normalization=normalization, device=device, mean_std=mean_std)
-        n_frame.append_mask(aloscene.Mask(n_mask, names=n_names))
-        # Put back the merged dropped labels
-        n_frame.set_labels(saved_frame_labels)
+        # n_frame = instance(n_tensor, names=n_names, normalization=normalization, device=device, mean_std=mean_std)
+        n_augmented_tensors.append_mask(aloscene.Mask(n_mask, names=n_names))
 
-        return n_frame
+        return n_augmented_tensors
 
     def _relative_to_absolute_hs_ws(self, hs=None, ws=None, assert_integer=True):
         """
@@ -487,11 +520,10 @@ def _pad(self, offset_y: tuple, offset_x: tuple, value=0, **kwargs):
         -------
         padded
         """
-
-        pad_top = int(offset_y[0] * self.H)
-        pad_bottom = int(offset_y[1] * self.H)
-        pad_left = int(offset_x[0] * self.W)
-        pad_right = int(offset_x[1] * self.W)
+        pad_top = int(round(offset_y[0] * self.H))
+        pad_bottom = int(round(offset_y[1] * self.H))
+        pad_left = int(round(offset_x[0] * self.W))
+        pad_right = int(round(offset_x[1] * self.W))
 
         padding = [pad_left, pad_top, pad_right, pad_bottom]
         tensor_padded = F.pad(self.rename(None), padding, padding_mode="constant", fill=value).reset_names()