diff --git a/alodataset/coco_detection_dataset.py b/alodataset/coco_detection_dataset.py index 06c9e427..d8d03365 100644 --- a/alodataset/coco_detection_dataset.py +++ b/alodataset/coco_detection_dataset.py @@ -37,47 +37,47 @@ def __init__( **kwargs, ): """ - Attributes - ---------- - CATEGORIES : set - List of all unique tags read from the database - labels_names : list - List of labels according to their corresponding positions - prepare : :mod:`BaseDataset ` - - Parameters - ---------- - img_folder : str - Path to the image folder relative at `dataset_dir` (stored into the aloception config file) - ann_file : str - Path to the annotation file relative at `dataset_dir` (stored into the aloception config file) - name : str, optional - Key of database name in `alodataset_config.json` file, by default *coco* - return_masks : bool, optional - Include masks labels in the output, by default False - classes : list, optional - List of classes to be filtered in the annotation reading process, by default None - stuff_ann_file: str, optional - Additional annotations with new classes, by default None - **kwargs : dict - :mod:`BaseDataset ` optional parameters - - Raises - ------ - Exception - If a classes list is decided, each label must be inside of :attr:`CATEGORIES` list attribute + Attributes + ---------- + CATEGORIES : set + List of all unique tags read from the database + labels_names : list + List of labels according to their corresponding positions + prepare : :mod:`BaseDataset ` - Examples - -------- - >>> coco_ds = CocoDetectionDataset( - ... img_folder = "val2017", - ... ann_file = "annotations/instances_val2017.json", - ... mode = "validation" - ) - >>> frames = next(iter(coco_ds.train_loader())) - >>> frames = frames[0].batch_list(frames) - >>> frames.get_view(frames.boxes2d,).render() - """ + Parameters + ---------- + img_folder : str + Path to the image folder relative at `dataset_dir` (stored into the aloception config file) + ann_file : str + Path to the annotation file relative at `dataset_dir` (stored into the aloception config file) + name : str, optional + Key of database name in `alodataset_config.json` file, by default *coco* + return_masks : bool, optional + Include masks labels in the output, by default False + classes : list, optional + List of classes to be filtered in the annotation reading process, by default None + stuff_ann_file: str, optional + Additional annotations with new classes, by default None + **kwargs : dict + :mod:`BaseDataset ` optional parameters + + Raises + ------ + Exception + If a classes list is decided, each label must be inside of :attr:`CATEGORIES` list attribute + + Examples + -------- + >>> coco_ds = CocoDetectionDataset( + ... img_folder = "val2017", + ... ann_file = "annotations/instances_val2017.json", + ... mode = "validation" + ) + >>> frames = next(iter(coco_ds.train_loader())) + >>> frames = frames[0].batch_list(frames) + >>> frames.get_view(frames.boxes2d,).render() + """ if "sample" not in kwargs: kwargs["sample"] = False @@ -140,7 +140,7 @@ def __init__( self.items = self.ids def getitem(self, idx): - """ Get the :mod:`Frame ` corresponds to *idx* index + """Get the :mod:`Frame ` corresponds to *idx* index Parameters ---------- @@ -298,14 +298,7 @@ def show_random_frame(coco_loader): def main(): """Main""" - # coco_dataset = CocoDetectionDataset( - # img_folder="val2017", - # stuff_ann_file="annotations/stuff_val2017.json", - # ann_file="annotations/instances_val2017.json", - # return_masks=True, - # ) coco_dataset = CocoDetectionDataset(sample=True) - for f, frames in enumerate(coco_dataset.train_loader(batch_size=2)): frames = Frame.batch_list(frames) frames.get_view().render() diff --git a/alodataset/sintel_disparity_dataset.py b/alodataset/sintel_disparity_dataset.py index 49a057a0..3763e1fe 100644 --- a/alodataset/sintel_disparity_dataset.py +++ b/alodataset/sintel_disparity_dataset.py @@ -106,7 +106,8 @@ def _get_camera_frames(self, sequence_data, camera): if __name__ == "__main__": dataset = SintelDisparityDataset(sample=True) - # show some frames at various indices - for idx in [1, 2, 5]: - frames = dataset.getitem(idx)["left"] - frames.get_view().render() + + for f, frames in enumerate(dataset.train_loader(batch_size=2)): + frames = Frame.batch_list(frames) + frames["left"].get_view().render() + break diff --git a/alodataset/sintel_flow_dataset.py b/alodataset/sintel_flow_dataset.py index e47548c4..26305961 100644 --- a/alodataset/sintel_flow_dataset.py +++ b/alodataset/sintel_flow_dataset.py @@ -96,7 +96,8 @@ def _get_camera_frames(self, sequence_data, camera): if __name__ == "__main__": dataset = SintelFlowDataset(sample=True) - # show some frames at various indices - for idx in [1, 2, 5]: - frames = dataset.getitem(idx)["left"] - frames.get_view().render() + + for f, frames in enumerate(dataset.train_loader(batch_size=2)): + frames = Frame.batch_list(frames) + frames["left"].get_view().render() + break diff --git a/aloscene/frame.py b/aloscene/frame.py index 8be45529..e603676b 100644 --- a/aloscene/frame.py +++ b/aloscene/frame.py @@ -25,7 +25,6 @@ def __new__( boxes2d: Union[dict, BoundingBoxes2D] = None, boxes3d: Union[dict, BoundingBoxes3D] = None, flow: Flow = None, - mask: Mask = None, segmentation: Mask = None, disparity: Disparity = None, normalization="255", @@ -45,7 +44,6 @@ def __new__( tensor.add_label("boxes2d", boxes2d, align_dim=["B", "T"], mergeable=False) tensor.add_label("boxes3d", boxes3d, align_dim=["B", "T"], mergeable=False) tensor.add_label("flow", flow, align_dim=["B", "T"], mergeable=False) - tensor.add_label("mask", mask, align_dim=["B", "T"], mergeable=True) tensor.add_label("disparity", disparity, align_dim=["B", "T"], mergeable=True) tensor.add_label("segmentation", segmentation, align_dim=["B", "T"], mergeable=False) @@ -99,19 +97,6 @@ def append_boxes3d(self, boxes_3d: BoundingBoxes3D, name: str = None): """ self._append_label("boxes3d", boxes_3d, name) - def append_mask(self, mask: Mask, name: str = None): - """Attach a mask to the frame. - - Parameters - ---------- - mask: aloscene.Mask - Mask to attached to the Frame - name: str - If none, the mask will be attached without name (if possible). Otherwise if no other unnamed - mask are attached to the frame, the mask will be added to the set of mask. - """ - self._append_label("mask", mask, name) - def append_flow(self, flow, name=None): """Attach a flow to the frame. diff --git a/aloscene/mask.py b/aloscene/mask.py index ca91a776..3ff6bed7 100644 --- a/aloscene/mask.py +++ b/aloscene/mask.py @@ -28,7 +28,7 @@ def __new__(cls, x, labels: Union[dict, Labels] = None, *args, **kwargs): x = load_mask(x) kwargs["names"] = ("N", "H", "W") tensor = super().__new__(cls, x, *args, **kwargs) - tensor.add_label("labels", labels, align_dim=["N"], mergeable=True) + tensor.add_label("labels", labels, align_dim=["N"], mergeable=False) return tensor def __init__(self, x, *args, **kwargs): diff --git a/aloscene/tensors/augmented_tensor.py b/aloscene/tensors/augmented_tensor.py index b251069d..3f41cafc 100644 --- a/aloscene/tensors/augmented_tensor.py +++ b/aloscene/tensors/augmented_tensor.py @@ -859,17 +859,22 @@ def pad(self, offset_y: tuple, offset_x: tuple, **kwargs): Parameters ---------- - offset_y: tuple - (percentage top_offset, percentage bottom_offset) Percentage based on the previous size - offset_x: tuple - (percentage left_offset, percentage right_offset) Percentage based on the previous size - + offset_y: tuple of float or tuple of int + (percentage top_offset, percentage bottom_offset) Percentage based on the previous size If tuple of int + the absolute value will be converted to float (percentahe) before to be applied. + offset_x: tuple of float or tuple of int + (percentage left_offset, percentage right_offset) Percentage based on the previous size. If tuple of int + the absolute value will be converted to float (percentage) before to be applied. Returns ------- croped : aloscene AugmentedTensor croped tensor """ + if isinstance(offset_y[0], int) and isinstance(offset_y[1], int): + offset_y = (offset_y[0] / self.H, offset_y[1] / self.H) + if isinstance(offset_x[0], int) and isinstance(offset_x[1], int): + offset_x = (offset_x[0] / self.W, offset_x[1] / self.W) padded = self._pad(offset_y, offset_x, **kwargs) padded.recursive_apply_on_labels_(lambda label: self._pad_label(label, offset_y, offset_x, **kwargs)) diff --git a/aloscene/tensors/spatial_augmented_tensor.py b/aloscene/tensors/spatial_augmented_tensor.py index e9e5107a..cdca1a77 100644 --- a/aloscene/tensors/spatial_augmented_tensor.py +++ b/aloscene/tensors/spatial_augmented_tensor.py @@ -13,11 +13,20 @@ class SpatialAugmentedTensor(AugmentedTensor): @staticmethod - def __new__(cls, x, *args, cam_intrinsic: CameraIntrinsic = None, cam_extrinsic: CameraExtrinsic = None, **kwargs): + def __new__( + cls, + x, + *args, + cam_intrinsic: CameraIntrinsic = None, + cam_extrinsic: CameraExtrinsic = None, + mask=None, + **kwargs, + ): tensor = super().__new__(cls, x, *args, **kwargs) # Add camera parameters as labels tensor.add_label("cam_intrinsic", cam_intrinsic, align_dim=["B", "T"], mergeable=True) tensor.add_label("cam_extrinsic", cam_extrinsic, align_dim=["B", "T"], mergeable=True) + tensor.add_label("mask", mask, align_dim=["B", "T"], mergeable=True) return tensor def __init__(self, x, *args, **kwargs): @@ -35,6 +44,19 @@ def W(self): def H(self): return self.shape[self.names.index("H")] + def append_mask(self, mask, name: str = None): + """Attach a mask to the frame. + + Parameters + ---------- + mask: aloscene.Mask + Mask to attached to the Frame + name: str + If none, the mask will be attached without name (if possible). Otherwise if no other unnamed + mask are attached to the frame, the mask will be added to the set of mask. + """ + self._append_label("mask", mask, name) + def append_cam_intrinsic(self, cam_intrinsic: CameraIntrinsic): self._append_label("cam_intrinsic", cam_intrinsic) @@ -218,15 +240,17 @@ def batch_list(sa_tensors: list, pad_boxes: bool = False): Parameters ---------- - sa_tensors: list - List of any aloscene.tensors.SpatialAugmentedTensor (or list of dicts or SpatialAugmentedTensor) + sa_tensors: list or dict + List of any aloscene.tensors.SpatialAugmentedTensor. If dict is given, this method will be applied on each + list of spatial augmented tensors within the list pad_boxes: bool By default, do not rescale the boxes attached to the sptial augmented Tensor (see explanation in boxes2d.pad) Returns ----------- - batch_frame: a child of aloscene.tensors.SpatialAugmentedTensor (or dict of SpatialAugmentedTensor) - Multiple sa_tensor with mask label + aloscene.tensors.SpatialAugmentedTensor + A child of aloscene.tensors.SpatialAugmentedTensor (or dict of SpatialAugmentedTensor) + with `mask` label to keep track of the padded areas. """ assert len(sa_tensors) >= 1 and isinstance(sa_tensors, list) frame0 = sa_tensors[0] @@ -240,28 +264,33 @@ def batch_list(sa_tensors: list, pad_boxes: bool = False): max_h, max_w = 0, 0 dtype = sa_tensors[0].dtype device = sa_tensors[0].device - normalization = sa_tensors[0].normalization - mean_std = sa_tensors[0].mean_std - instance = type(sa_tensors[0]) + + if ( + "N" in sa_tensors[0].names + or "C" not in sa_tensors[0].names + or "H" not in sa_tensors[0].names + or "W" not in sa_tensors[0].names + ): + raise Exception( + "{} (with names: {}) as it is, does not seem to be mergeable using batch_list.".format( + type(sa_tensors[0]), sa_tensors[0].names + ) + ) # Retrieve the target size for i, frame in enumerate(sa_tensors): - max_h, max_w = max(frame.H, max_h), max(frame.W, max_w) + if frame is not None: + max_h, max_w = max(frame.H, max_h), max(frame.W, max_w) saved_frame_labels = {} n_sa_tensors = [] - for i, frame in enumerate(sa_tensors): + for i, n_frame in enumerate(sa_tensors): + if n_frame is None: + continue # Add the batch dimension and drop the labels - n_sa_tensors.append(frame.batch()) - + n_sa_tensors.append(n_frame.batch()) + frame = n_frame labels = n_sa_tensors[i].get_labels() - # Pad labels - labels = AugmentedTensor.apply_on_label( - labels, - lambda l: l.pad( - (0, max_h / frame.H - 1), (0, max_w / frame.W - 1), frame_size=frame.HW, pad_boxes=pad_boxes - ), - ) # Merge labels on the first dim (TODO, move on an appropriate method) # The following can be merge into an other method in the augmented_tensor class that do roughly the same thing @@ -303,36 +332,40 @@ def batch_list(sa_tensors: list, pad_boxes: bool = False): batch_size = len(n_sa_tensors) # Retrieve the new shapes and dim names n_tensor_shape, n_mask_shape = list(n_sa_tensors[0].shape), list(n_sa_tensors[0].shape) + # New target frame size n_tensor_shape[0], n_mask_shape[0] = batch_size, batch_size n_tensor_shape[n_sa_tensors[0].names.index("H")] = max_h n_tensor_shape[n_sa_tensors[0].names.index("W")] = max_w + # New target mask shape n_mask_shape[n_sa_tensors[0].names.index("H")] = max_h n_mask_shape[n_sa_tensors[0].names.index("W")] = max_w n_mask_shape[n_sa_tensors[0].names.index("C")] = 1 n_names = n_sa_tensors[0].names + n_padded_list = [] + for spatial_tensor in n_sa_tensors: + h_pad = (0, max_h - spatial_tensor.H) + w_pad = (0, max_w - spatial_tensor.W) + padded_spatial_tensor = spatial_tensor.pad(h_pad, w_pad, pad_boxes=pad_boxes) + n_padded_list.append(padded_spatial_tensor) + + n_augmented_tensors = torch.cat(n_padded_list, dim=0) + # Set the new mask and tensor buffer filled up with zeros and ones # Also, for normalized frames, the zero value is actually different based on the mean/std n_tensor = torch.zeros(tuple(n_tensor_shape), dtype=dtype, device=device) - if mean_std is not None: - mean_tensor, std_tensor = instance._get_mean_std_tensor( - tuple(n_mask_shape), n_names, mean_std, device=device - ) - n_tensor = n_tensor - mean_tensor - n_tensor = n_tensor / std_tensor - n_mask = torch.ones(tuple(n_mask_shape), dtype=torch.float, device=device) + # Create the batch list mask + n_mask = torch.ones(tuple(n_mask_shape), dtype=torch.float, device=device) for b, frame in enumerate(n_sa_tensors): n_slice = frame.get_slices({"B": b, "H": slice(None, frame.H), "W": slice(None, frame.W)}) n_tensor[n_slice].copy_(frame[0]) n_mask[n_slice] = 0 - n_frame = instance(n_tensor, names=n_names, normalization=normalization, device=device, mean_std=mean_std) - n_frame.append_mask(aloscene.Mask(n_mask, names=n_names)) - # Put back the merged dropped labels - n_frame.set_labels(saved_frame_labels) + # n_frame = instance(n_tensor, names=n_names, normalization=normalization, device=device, mean_std=mean_std) + n_augmented_tensors.append_mask(aloscene.Mask(n_mask, names=n_names)) - return n_frame + return n_augmented_tensors def _relative_to_absolute_hs_ws(self, hs=None, ws=None, assert_integer=True): """ @@ -487,11 +520,10 @@ def _pad(self, offset_y: tuple, offset_x: tuple, value=0, **kwargs): ------- padded """ - - pad_top = int(offset_y[0] * self.H) - pad_bottom = int(offset_y[1] * self.H) - pad_left = int(offset_x[0] * self.W) - pad_right = int(offset_x[1] * self.W) + pad_top = int(round(offset_y[0] * self.H)) + pad_bottom = int(round(offset_y[1] * self.H)) + pad_left = int(round(offset_x[0] * self.W)) + pad_right = int(round(offset_x[1] * self.W)) padding = [pad_left, pad_top, pad_right, pad_bottom] tensor_padded = F.pad(self.rename(None), padding, padding_mode="constant", fill=value).reset_names()