diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py index c34a575961..b97e858ba5 100644 --- a/configs/_base_/datasets/scannet-3d-18class.py +++ b/configs/_base_/datasets/scannet-3d-18class.py @@ -16,18 +16,16 @@ use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, + with_bbox_3d=True, + with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), max_cat_id=40), - dict( - type='GlobalAlignment', rotation_axis=2, - ignore_index=len(class_names)), dict(type='IndoorPointSample', num_points=40000), dict( type='RandomFlip3D', @@ -54,9 +52,7 @@ shift_height=True, load_dim=6, use_dim=[0, 1, 2]), - dict( - type='GlobalAlignment', rotation_axis=2, - ignore_index=len(class_names)), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), @@ -93,16 +89,11 @@ use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict(type='PointSegClassMapping', valid_cat_ids=valid_class_ids), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index 1aff3924e1..fa8509cb94 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -296,17 +296,11 @@ def __repr__(self): @PIPELINES.register_module() class GlobalAlignment(object): """Apply global alignment to 3D scene points by rotation and translation. - Extract 3D bboxes from the aligned points and instance mask if provided. Args: rotation_axis (int): Rotation axis for points and bboxes rotation. - ignore_index (int): Label index for which we won't extract bboxes. - extract_bbox (bool): Whether extract new ground-truth bboxes after \ - alignment. This requires instance and semantic mask inputs. - Defaults to False. Note: - This function should be called after PointSegClassMapping in pipeline. We do not record the applied rotation and translation as in \ GlobalRotScaleTrans. Because usually, we do not need to reverse \ the alignment step. @@ -314,10 +308,8 @@ class GlobalAlignment(object): bounding boxes for evaluation. """ - def __init__(self, rotation_axis, ignore_index, extract_bbox=False): + def __init__(self, rotation_axis): self.rotation_axis = rotation_axis - self.ignore_index = ignore_index - self.extract_bbox = extract_bbox def _trans_points(self, input_dict, trans_factor): """Private function to translate points. @@ -357,74 +349,6 @@ def _check_rot_mat(self, rot_mat): is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all() assert is_valid, f'invalid rotation matrix {rot_mat}' - def _bbox_from_points(self, points): - """Get the bounding box of a set of points. - - Args: - points (np.ndarray): A set of points belonging to one instance. - - Returns: - np.ndarray: A bounding box of input points. We use origin as \ - (0.5, 0.5, 0.5) without yaw. - """ - xmin = np.min(points[:, 0]) - ymin = np.min(points[:, 1]) - zmin = np.min(points[:, 2]) - xmax = np.max(points[:, 0]) - ymax = np.max(points[:, 1]) - zmax = np.max(points[:, 2]) - bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, - (zmin + zmax) / 2, xmax - xmin, ymax - ymin, - zmax - zmin]) - return bbox - - def _extract_bboxes(self, input_dict): - """Extract bounding boxes from points, semantic mask and instance mask. - - Args: - input_dict (dict): Result dict from loading pipeline. - - Returns: - dict: Results after extracting bboxes, keys in \ - input_dict['bbox3d_fields'] are updated in the dict. - """ - # TODO: this function is only used in ScanNet-Det pipeline currently - # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes - from mmdet3d.core.bbox import DepthInstance3DBoxes - - assert 'pts_instance_mask' in input_dict.keys(), \ - 'instance mask is not provided in GlobalAlignment' - assert 'pts_semantic_mask' in input_dict.keys(), \ - 'semantic mask is not provided in GlobalAlignment' - - coords = input_dict['points'].coord.numpy() - inst_mask = input_dict['pts_instance_mask'] - sem_mask = input_dict['pts_semantic_mask'] - - # select points from valid categories where we want to extract bboxes - valid_cat_mask = (sem_mask != self.ignore_index) - inst_ids = np.unique(inst_mask[valid_cat_mask]) # ids of valid insts - instance_bboxes = np.zeros((inst_ids.shape[0], 7)) - inst_id2cat_id = { - inst_id: sem_mask[inst_mask == inst_id][0] - for inst_id in inst_ids - } - for bbox_idx, inst_id in enumerate(inst_ids): - cat_id = inst_id2cat_id[inst_id] - inst_coords = coords[inst_mask == inst_id] - bbox = self._bbox_from_points(inst_coords) - instance_bboxes[bbox_idx, :6] = bbox - instance_bboxes[bbox_idx, 6] = cat_id - - if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']: - input_dict['bbox3d_fields'].append('gt_bboxes_3d') - input_dict['gt_bboxes_3d'] = DepthInstance3DBoxes( - instance_bboxes[:, :6], - box_dim=6, - with_yaw=False, - origin=(0.5, 0.5, 0.5)) - input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long) - def __call__(self, input_dict): """Call function to shuffle points. @@ -447,16 +371,12 @@ def __call__(self, input_dict): self._check_rot_mat(rot_mat) self._rot_points(input_dict, rot_mat) self._trans_points(input_dict, trans_vec) - if self.extract_bbox: - self._extract_bboxes(input_dict) return input_dict def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(rotation_axis={self.rotation_axis},' - repr_str += f' ignore_index={self.ignore_index},' - repr_str += f' extract_bbox={self.extract_bbox})' + repr_str += f'(rotation_axis={self.rotation_axis})' return repr_str diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index dc7e34cae1..1dfff7d7f7 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -154,89 +154,6 @@ def _get_axis_align_matrix(info): 'use new pre-process scripts to re-generate ScanNet data') return np.eye(4).astype(np.float32) - def evaluate(self, - results, - metric=None, - iou_thr=(0.25, 0.5), - logger=None, - show=False, - out_dir=None, - pipeline=None): - """Evaluate. - - Evaluation in indoor protocol. - Since ScanNet detection data pipeline re-computes ground-truth boxes, - we can't directly use gt_bboxes from self.data_infos. - - Args: - results (list[dict]): List of results. - metric (str | list[str]): Metrics to be evaluated. - iou_thr (list[float]): AP IoU thresholds. - show (bool): Whether to visualize. - Default: False. - out_dir (str): Path to save the visualization results. - Default: None. - pipeline (list[dict], optional): raw data loading for showing. - Default: None. - - Returns: - dict: Evaluation results. - """ - from mmdet3d.core.evaluation import indoor_eval - assert isinstance( - results, list), f'Expect results to be list, got {type(results)}.' - assert len(results) > 0, 'Expect length of results > 0.' - assert len(results) == len(self.data_infos) - assert isinstance( - results[0], dict - ), f'Expect elements in results to be dict, got {type(results[0])}.' - # load gt_bboxes via pipeline - pipeline = self._get_pipeline(pipeline) - gt_bboxes = [ - self._extract_data( - i, pipeline, ['gt_bboxes_3d', 'gt_labels_3d'], load_annos=True) - for i in range(len(self.data_infos)) - ] - gt_annos = [self._build_annos(*gt_bbox) for gt_bbox in gt_bboxes] - label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)} - ret_dict = indoor_eval( - gt_annos, - results, - iou_thr, - label2cat, - logger=logger, - box_type_3d=self.box_type_3d, - box_mode_3d=self.box_mode_3d) - if show: - self.show(results, out_dir, pipeline=pipeline) - - return ret_dict - - @staticmethod - def _build_annos(gt_bboxes, gt_labels): - """Transform gt bboxes and labels into self.data_infos['annos'] format. - - Args: - gt_bboxes (:obj:`BaseInstance3DBoxes`): \ - 3D bounding boxes in Depth coordinate - gt_labels (torch.Tensor): Labels of boxes. - - Returns: - dict: annotations including the following keys - - - gt_boxes_upright_depth (np.ndarray): 3D bounding boxes. - - class (np.ndarray): Labels of boxes. - - gt_num (int): Number of boxes. - """ - bbox = gt_bboxes.tensor.numpy()[:, :6].copy() # drop yaw dimension - bbox[..., 2] += bbox[..., 5] / 2 # bottom center to gravity center - anno = { - 'gt_boxes_upright_depth': bbox, - 'class': gt_labels.numpy(), - 'gt_num': gt_labels.shape[0] - } - return anno - def _build_default_pipeline(self): """Build the default pipeline for this dataset.""" pipeline = [ @@ -248,19 +165,11 @@ def _build_default_pipeline(self): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict( - type='PointSegClassMapping', - valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, - 33, 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(self.CLASSES), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=self.CLASSES, @@ -287,10 +196,9 @@ def show(self, results, out_dir, show=True, pipeline=None): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] - points, gt_bboxes = self._extract_data( - i, pipeline, ['points', 'gt_bboxes_3d'], load_annos=True) - points = points.numpy() - gt_bboxes = gt_bboxes.tensor.numpy() + points = self._extract_data( + i, pipeline, 'points', load_annos=True).numpy() + gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show) diff --git a/tests/data/scannet/scannet_infos.pkl b/tests/data/scannet/scannet_infos.pkl index d0fe1b839d..20595af6a5 100644 Binary files a/tests/data/scannet/scannet_infos.pkl and b/tests/data/scannet/scannet_infos.pkl differ diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py index 37ea8ccba4..1974553447 100644 --- a/tests/test_data/test_datasets/test_scannet_dataset.py +++ b/tests/test_data/test_datasets/test_scannet_dataset.py @@ -27,15 +27,11 @@ def test_getitem(): with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -79,13 +75,15 @@ def test_getitem(): [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( - [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000], - [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000], - [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000], - [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000], - [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]]) - expected_gt_labels = np.array( - [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000], + [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000], + [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000], + [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000], + [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]]) + expected_gt_labels = np.array([ + 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, + 0, 0, 0, 5, 5, 5 + ]) expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) original_classes = scannet_dataset.CLASSES @@ -131,23 +129,47 @@ def test_evaluate(): results = [] pred_boxes = dict() pred_boxes['boxes_3d'] = DepthInstance3DBoxes( - torch.tensor( - [[-3.7146, -1.0654, 0.6052, 0.6298, 1.9906, 0.4429, 0.0000], - [-8.5576, -1.8178, 0.2046, 1.1263, 2.7851, 1.8632, 0.0000], - [-8.8859, -5.3550, 0.9772, 0.9093, 0.3098, 0.5662, 0.0000], - [-8.0989, -5.0358, 0.0372, 0.2746, 0.2057, 0.5532, 0.0000], - [-6.9733, 0.3352, -0.0296, 1.2265, 0.7187, 2.2613, 0.0000], - [-5.3636, -1.6047, 0.3701, 2.8043, 1.1057, 0.3171, 0.0000]])) - pred_boxes['labels_3d'] = torch.tensor([4, 11, 11, 10, 0, 3]) + torch.tensor([[ + 1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01, + 5.7235e-01, 0.0000e+00 + ], + [ + 2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01, + 1.7072e-01, 6.7154e-01, 0.0000e+00 + ], + [ + 1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01, + 2.5346e+00, 1.2145e+00, 0.0000e+00 + ], + [ + 2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01, + 1.8428e+00, 2.8697e-01, 0.0000e+00 + ], + [ + -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01, + 3.8582e+00, 2.1603e+00, 0.0000e+00 + ], + [ + -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00, + 2.1603e-01, 1.2767e+00, 0.0000e+00 + ], + [ + -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00, + 6.8676e-01, 1.0586e+00, 0.0000e+00 + ], + [ + -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00, + 1.4927e+00, 2.3380e+00, 0.0000e+00 + ]])) + pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11]) pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5]) results.append(pred_boxes) metric = [0.25, 0.5] ret_dict = scannet_dataset.evaluate(results, metric) - assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01 + assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01 + assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01 assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01 - assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01 # test evaluate with pipeline class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', @@ -163,19 +185,11 @@ def test_evaluate(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict( - type='PointSegClassMapping', - valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, - 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, @@ -185,11 +199,10 @@ def test_evaluate(): ] ret_dict = scannet_dataset.evaluate( results, metric, pipeline=eval_pipeline) - assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01 + assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01 + assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01 assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01 - assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01 def test_show(): @@ -254,19 +267,11 @@ def test_show(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict( - type='PointSegClassMapping', - valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, - 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py index 8f672a2f65..5e64d7e6c4 100644 --- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py +++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py @@ -3,8 +3,7 @@ import pytest import torch -from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, - DepthInstance3DBoxes, LiDARInstance3DBoxes) +from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from mmdet3d.core.points import DepthPoints, LiDARPoints from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment, ObjectNoise, ObjectSample, PointShuffle, @@ -225,49 +224,22 @@ def test_points_range_filter(): def test_global_alignment(): np.random.seed(0) - valid_cat_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, - 36, 39) - ignore_index = len(valid_cat_ids) - cat_ids2class = np.ones((41, ), dtype=np.int) * ignore_index - for class_id, cat_id in enumerate(valid_cat_ids): - cat_ids2class[cat_id] = class_id - - global_alignment = GlobalAlignment( - rotation_axis=2, ignore_index=ignore_index, extract_bbox=True) + global_alignment = GlobalAlignment(rotation_axis=2) points = np.fromfile('tests/data/scannet/points/scene0000_00.bin', np.float32).reshape(-1, 6) - sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin', - np.long) - ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin', - np.long) annos = mmcv.load('tests/data/scannet/scannet_infos.pkl') info = annos[0] - gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'] axis_align_matrix = info['annos']['axis_align_matrix'] - gt_labels_3d = info['annos']['class'] depth_points = DepthPoints(points.copy(), points_dim=6) - depth_bboxes = DepthInstance3DBoxes( - gt_bboxes_3d, - box_dim=gt_bboxes_3d.shape[-1], - with_yaw=False, - origin=(0.5, 0.5, 0.5)) - sem_mask = cat_ids2class[sem_mask] input_dict = dict( points=depth_points.clone(), - gt_bboxes_3d=depth_bboxes, - bbox3d_fields=['gt_bboxes_3d'], - gt_labels_3d=gt_labels_3d, - ann_info=dict(axis_align_matrix=axis_align_matrix), - pts_instance_mask=ins_mask, - pts_semantic_mask=sem_mask) + ann_info=dict(axis_align_matrix=axis_align_matrix)) input_dict = global_alignment(input_dict) trans_depth_points = input_dict['points'] - trans_depth_bboxes = input_dict['gt_bboxes_3d'] - trans_bbox_labels = input_dict['gt_labels_3d'] # construct expected transformed points by affine transformation pts = np.ones((points.shape[0], 4)) @@ -275,60 +247,11 @@ def test_global_alignment(): trans_pts = np.dot(pts, axis_align_matrix.T) expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1) - expected_bbox_labels = np.array( - [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) - expected_depth_bboxes = np.array( - [[ - -3.714606, -1.0654305, 0.6051854, 0.6297655, 1.9905674, 0.44288868, - 0. - ], - [ - -8.557551, -1.8178326, 0.20456636, 1.1263373, 2.7851129, - 1.8631845, 0. - ], - [ - -8.885854, -5.354957, 0.97720087, 0.9093195, 0.30981588, 0.566175, - 0. - ], - [ - -8.098918, -5.0357704, 0.03724962, 0.27458152, 0.20566699, - 0.5532104, 0. - ], - [ - -6.9733434, 0.33523083, -0.02958763, 1.2264912, 0.7187278, - 2.2613325, 0. - ], - [ - -5.36362, -1.6046655, 0.37014085, 2.8042943, 1.1057366, - 0.31707314, 0. - ], [-2.6299255, -2.3314357, 1.4469249, 0., 0., 0., 0.], - [-5.201888, -1.014641, 0.11020403, 0., 0., 0., 0.], - [ - -3.5216672, -6.8292904, 0.26571387, 0.13945593, 0.12182455, - 0.02463818, 0. - ], - [ - -6.4834313, -5.4506774, 0.13558027, 1.4790803, 0.6031074, - 0.60305846, 0. - ], - [ - -9.338867, -4.616579, 0.6112565, 0.17650154, 0.988079, 0.16838372, - 0. - ], [-2.0639155, -1.245964, 0.30754995, 0., 0., 0., 0.], - [-2.002855, -1.9495802, 2.2899528, 0., 0., 0., 0.], - [-2.1240144, -3.751592, 0.92695427, 0., 0., 0., 0.], - [-3.6406162, -5.1366153, 0.25374442, 0., 0., 0., 0.]]) - assert np.allclose( trans_depth_points.tensor.numpy(), expected_points, atol=1e-6) - assert np.all(trans_bbox_labels == expected_bbox_labels) - assert np.allclose( - trans_depth_bboxes.tensor.numpy(), expected_depth_bboxes, atol=1e-6) repr_str = repr(global_alignment) - expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \ - f' ignore_index={ignore_index},' \ - f' extract_bbox=True)' + expected_repr_str = 'GlobalAlignment(rotation_axis=2)' assert repr_str == expected_repr_str diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py index 5563dcd073..6e705e85f9 100644 --- a/tests/test_data/test_pipelines/test_indoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py @@ -23,19 +23,15 @@ def test_scannet_pipeline(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, + with_bbox_3d=True, + with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -97,13 +93,15 @@ def test_scannet_pipeline(): [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( - [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000], - [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000], - [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000], - [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000], - [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]]) - expected_gt_labels_3d = np.array( - [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000], + [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000], + [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000], + [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000], + [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]]) + expected_gt_labels_3d = np.array([ + 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, + 0, 0, 0, 5, 5, 5 + ]) expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) assert torch.allclose(points, expected_points, 1e-2)