From 29f8d895607789ac17c064126a659d54ff4f930d Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Mon, 12 Apr 2021 22:27:43 +0800 Subject: [PATCH 01/12] extract axis aligned matrix to info file --- data/scannet/batch_load_scannet_data.py | 7 +++++-- data/scannet/load_scannet_data.py | 12 ++++-------- tools/data_converter/scannet_data_utils.py | 9 +++++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/data/scannet/batch_load_scannet_data.py b/data/scannet/batch_load_scannet_data.py index c6bb3a7b34..b55b363309 100644 --- a/data/scannet/batch_load_scannet_data.py +++ b/data/scannet/batch_load_scannet_data.py @@ -35,8 +35,9 @@ def export_one_scan(scan_name, # includes axisAlignment info for the train set scans. meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt') mesh_vertices, semantic_labels, instance_labels, instance_bboxes, \ - instance2semantic = export(mesh_file, agg_file, seg_file, - meta_file, label_map_file, None, test_mode) + instance2semantic, axis_align_matrix = export( + mesh_file, agg_file, seg_file, meta_file, label_map_file, None, + test_mode) if not test_mode: mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) @@ -66,6 +67,8 @@ def export_one_scan(scan_name, np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels) np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels) np.save(f'{output_filename_prefix}_bbox.npy', instance_bboxes) + np.save(f'{output_filename_prefix}_axis_align_matrix.npy', + axis_align_matrix) def batch_export(max_num_point, diff --git a/data/scannet/load_scannet_data.py b/data/scannet/load_scannet_data.py index d545ae48cd..0cc20312a0 100644 --- a/data/scannet/load_scannet_data.py +++ b/data/scannet/load_scannet_data.py @@ -69,7 +69,7 @@ def export(mesh_file, label_map_file (str): Path of the label_map_file. output_file (str): Path of the output folder. Default: None. - test_mode (bool): Whether is generating training data without labels. + test_mode (bool): Whether is generating test data without labels. Default: False. It returns a tuple, which containts the the following things: @@ -86,8 +86,7 @@ def export(mesh_file, # Load scene axis alignment matrix lines = open(meta_file).readlines() - # TODO: test set data doesn't have align_matrix! - # TODO: save align_matrix and move align step to pipeline in the future + # test set data doesn't have align_matrix axis_align_matrix = np.eye(4) for line in lines: if 'axisAlignment' in line: @@ -97,10 +96,6 @@ def export(mesh_file, ] break axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4)) - pts = np.ones((mesh_vertices.shape[0], 4)) - pts[:, 0:3] = mesh_vertices[:, 0:3] - pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4 - mesh_vertices[:, 0:3] = pts[:, 0:3] # Load semantic and instance labels if not test_mode: @@ -151,9 +146,10 @@ def export(mesh_file, np.save(output_file + '_sem_label.npy', label_ids) np.save(output_file + '_ins_label.npy', instance_ids) np.save(output_file + '_bbox.npy', instance_bboxes) + np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix) return mesh_vertices, label_ids, instance_ids, \ - instance_bboxes, object_id_to_label_id + instance_bboxes, object_id_to_label_id, axis_align_matrix def main(): diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py index fa48f5a45b..2f4ed60374 100644 --- a/tools/data_converter/scannet_data_utils.py +++ b/tools/data_converter/scannet_data_utils.py @@ -48,6 +48,12 @@ def get_box_label(self, idx): mmcv.check_file_exist(box_file) return np.load(box_file) + def get_axis_align_matrix(self, idx): + matrix_file = osp.join(self.root_dir, 'scannet_instance_data', + f'{idx}_axis_align_matrix.npy') + mmcv.check_file_exist(matrix_file) + return np.load(matrix_file) + def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): """Get data infos. @@ -125,6 +131,9 @@ def process_single_scene(sample_idx): self.cat_ids2class[classes[i]] for i in range(annotations['gt_num']) ]) + axis_align_matrix = self.get_axis_align_matrix( + sample_idx) # [4, 4] + annotations['axis_align_matrix'] = axis_align_matrix info['annos'] = annotations return info From f82a60543507aeb2903e78b960da33b8f8cfd2a1 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 13 Apr 2021 17:37:58 +0800 Subject: [PATCH 02/12] support rotation matrix input for BaseInstance3DBoxes.rotate function --- mmdet3d/core/bbox/structures/base_box3d.py | 13 ++-- mmdet3d/core/bbox/structures/cam_box3d.py | 23 +++++-- mmdet3d/core/bbox/structures/depth_box3d.py | 25 +++++-- mmdet3d/core/bbox/structures/lidar_box3d.py | 23 +++++-- tests/test_utils/test_box3d.py | 74 +++++++++++++++++++-- 5 files changed, 129 insertions(+), 29 deletions(-) diff --git a/mmdet3d/core/bbox/structures/base_box3d.py b/mmdet3d/core/bbox/structures/base_box3d.py index fa674ef53f..c03cd12e49 100644 --- a/mmdet3d/core/bbox/structures/base_box3d.py +++ b/mmdet3d/core/bbox/structures/base_box3d.py @@ -129,12 +129,15 @@ def corners(self): pass @abstractmethod - def rotate(self, angles, axis=0): - """Calculate whether the points are in any of the boxes. + def rotate(self, angle, points=None): + """Rotate boxes with points (optional) with the given angle or \ + rotation matrix. Args: - angles (float): Rotation angles. - axis (int): The axis to rotate the boxes. + angle (float | torch.Tensor | np.ndarray): + Rotation angle or rotation matrix. + points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): + Points to rotate. Defaults to None. """ pass @@ -144,7 +147,7 @@ def flip(self, bev_direction='horizontal'): pass def translate(self, trans_vector): - """Calculate whether the points are in any of the boxes. + """Translate boxes with the given translation vector. Args: trans_vector (torch.Tensor): Translation vector of size 1x3. diff --git a/mmdet3d/core/bbox/structures/cam_box3d.py b/mmdet3d/core/bbox/structures/cam_box3d.py index 4eab77bcc3..d72391b160 100644 --- a/mmdet3d/core/bbox/structures/cam_box3d.py +++ b/mmdet3d/core/bbox/structures/cam_box3d.py @@ -169,10 +169,12 @@ def nearest_bev(self): return bev_boxes def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle. + """Rotate boxes with points (optional) with the given angle or \ + rotation matrix. Args: - angle (float, torch.Tensor): Rotation angle. + angle (float | torch.Tensor | np.ndarray): + Rotation angle or rotation matrix. points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): Points to rotate. Defaults to None. @@ -183,10 +185,19 @@ def rotate(self, angle, points=None): """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) - rot_sin = torch.sin(angle) - rot_cos = torch.cos(angle) - rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], [0, 1, 0], - [rot_sin, 0, rot_cos]]) + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1 + + if angle.numel() == 1: + rot_sin = torch.sin(angle) + rot_cos = torch.cos(angle) + rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], + [0, 1, 0], + [rot_sin, 0, rot_cos]]) + else: + rot_mat_T = angle + rot_sin = rot_mat_T[2, 0] + rot_cos = rot_mat_T[0, 0] + angle = np.arctan2(rot_sin, rot_cos) self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T self.tensor[:, 6] += angle diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py index a5eb6ed609..8e619114f9 100644 --- a/mmdet3d/core/bbox/structures/depth_box3d.py +++ b/mmdet3d/core/bbox/structures/depth_box3d.py @@ -116,10 +116,12 @@ def nearest_bev(self): return bev_boxes def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle. + """Rotate boxes with points (optional) with the given angle or \ + rotation matrix. Args: - angle (float, torch.Tensor): Rotation angle. + angle (float | torch.Tensor | np.ndarray): + Rotation angle or rotation matrix. points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): Points to rotate. Defaults to None. @@ -130,11 +132,20 @@ def rotate(self, angle, points=None): """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) - rot_sin = torch.sin(angle) - rot_cos = torch.cos(angle) - rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0], - [rot_sin, rot_cos, 0], [0, 0, - 1]]).T + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1 + + if angle.numel() == 1: + rot_sin = torch.sin(angle) + rot_cos = torch.cos(angle) + rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0], + [rot_sin, rot_cos, 0], + [0, 0, 1]]).T + else: + rot_mat_T = angle.T + rot_sin = rot_mat_T[0, 1] + rot_cos = rot_mat_T[0, 0] + angle = np.arctan2(rot_sin, rot_cos) + self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T if self.with_yaw: self.tensor[:, 6] -= angle diff --git a/mmdet3d/core/bbox/structures/lidar_box3d.py b/mmdet3d/core/bbox/structures/lidar_box3d.py index a17c3bf49b..2acd8cf60e 100644 --- a/mmdet3d/core/bbox/structures/lidar_box3d.py +++ b/mmdet3d/core/bbox/structures/lidar_box3d.py @@ -114,10 +114,12 @@ def nearest_bev(self): return bev_boxes def rotate(self, angle, points=None): - """Rotate boxes with points (optional) with the given angle. + """Rotate boxes with points (optional) with the given angle or \ + rotation matrix. Args: - angle (float | torch.Tensor): Rotation angle. + angles (float | torch.Tensor | np.ndarray): + Rotation angle or rotation matrix. points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): Points to rotate. Defaults to None. @@ -128,10 +130,19 @@ def rotate(self, angle, points=None): """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) - rot_sin = torch.sin(angle) - rot_cos = torch.cos(angle) - rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0], - [rot_sin, rot_cos, 0], [0, 0, 1]]) + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1 + + if angle.numel() == 1: + rot_sin = torch.sin(angle) + rot_cos = torch.cos(angle) + rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0], + [rot_sin, rot_cos, 0], + [0, 0, 1]]) + else: + rot_mat_T = angle + rot_sin = rot_mat_T[1, 0] + rot_cos = rot_mat_T[0, 0] + angle = np.arctan2(rot_sin, rot_cos) self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T self.tensor[:, 6] += angle diff --git a/tests/test_utils/test_box3d.py b/tests/test_utils/test_box3d.py index 3536d88122..8bcf12b46b 100644 --- a/tests/test_utils/test_box3d.py +++ b/tests/test_utils/test_box3d.py @@ -11,6 +11,7 @@ points_cam2img, rotation_3d_in_axis, xywhr2xyxyr) +from mmdet3d.core.points import CameraPoints, DepthPoints, LiDARPoints def test_bbox3d_mapping_back(): @@ -225,6 +226,7 @@ def test_lidar_boxes3d(): assert torch.allclose(points, expected_points) # test box rotation + # with input torch.Tensor points and angle expected_tensor = torch.tensor( [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976], [8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, 1.6576], @@ -244,6 +246,16 @@ def test_lidar_boxes3d(): assert torch.allclose(points, expected_points, 1e-3) assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) + # with input torch.Tensor points and rotation matrix + points, rot_mat_T = boxes.rotate(-0.13603681398218053, points) # back + rot_mat = np.array([[0.99076125, -0.13561762, 0.], + [0.13561762, 0.99076125, 0.], [0., 0., 1.]]) + points, rot_mat_T = boxes.rotate(rot_mat, points) + assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) + assert torch.allclose(points, expected_points, 1e-3) + assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) + + # with input np.ndarray points and angle points_np = np.array([[-1.0280, 0.9888, -1.4658], [-4.3695, 2.1310, -1.3857], [-6.5263, 1.5595, @@ -262,6 +274,15 @@ def test_lidar_boxes3d(): assert np.allclose(points_np, expected_points_np, 1e-3) assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + # with input LiDARPoints and rotation matrix + points_np, rot_mat_T_np = boxes.rotate(-0.13603681398218053, points_np) + lidar_points = LiDARPoints(points_np) + lidar_points, rot_mat_T_np = boxes.rotate(rot_mat, lidar_points) + points_np = lidar_points.tensor.numpy() + + assert np.allclose(points_np, expected_points_np, 1e-3) + assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + # test box scaling expected_tensor = torch.tensor([[ 1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377, @@ -701,6 +722,7 @@ def test_camera_boxes3d(): assert torch.allclose(points, expected_points) # test box rotation + # with input torch.Tensor points and angle expected_tensor = Box3DMode.convert( torch.tensor( [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976], @@ -722,6 +744,17 @@ def test_camera_boxes3d(): assert torch.allclose(points, expected_points, 1e-3) assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) + # with input torch.Tensor points and rotation matrix + points, rot_mat_T = boxes.rotate( + torch.tensor(-0.13603681398218053), points) # back + rot_mat = np.array([[0.99076125, 0., -0.13561762], [0., 1., 0.], + [0.13561762, 0., 0.99076125]]) + points, rot_mat_T = boxes.rotate(rot_mat, points) + assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) + assert torch.allclose(points, expected_points, 1e-3) + assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) + + # with input np.ndarray points and angle points_np = np.array([[0.6762, 1.2559, -1.4658, 2.5359], [0.8784, 4.7814, -1.3857, 0.7167], [-0.2517, 6.7053, -0.9697, 0.5599], @@ -741,6 +774,15 @@ def test_camera_boxes3d(): assert np.allclose(points_np, expected_points_np, 1e-3) assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + # with input CameraPoints and rotation matrix + points_np, rot_mat_T_np = boxes.rotate( + torch.tensor(-0.13603681398218053), points_np) + camera_points = CameraPoints(points_np, points_dim=4) + camera_points, rot_mat_T_np = boxes.rotate(rot_mat, camera_points) + points_np = camera_points.tensor.numpy() + assert np.allclose(points_np, expected_points_np, 1e-3) + assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + # test box scaling expected_tensor = Box3DMode.convert( torch.tensor([[ @@ -1007,7 +1049,7 @@ def test_depth_boxes3d(): # test box concatenation expected_tensor = torch.tensor( [[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601], - [2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971], + [2.3262, 3.3065, 0.44255, 0.8234, 0.5325, 1.0099, 2.9971], [2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693], [1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]]) boxes = DepthInstance3DBoxes.cat([boxes_1, boxes_2]) @@ -1049,14 +1091,16 @@ def test_depth_boxes3d(): [0.5358, -4.5870, -1.4741, 0.0556]]) assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) assert torch.allclose(points, expected_points) + # test box rotation + # with input torch.Tensor points and angle boxes_rot = boxes.clone() expected_tensor = torch.tensor( [[-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585], [-2.4016, -3.2521, 0.4426, 0.8234, 0.5325, 1.0099, -0.1215], [-2.5181, -2.5298, -0.4321, 0.8597, 0.6193, 1.0204, -0.0493], [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]]) - points, rot_mar_T = boxes_rot.rotate(-0.022998953275003075, points) + points, rot_mat_T = boxes_rot.rotate(-0.022998953275003075, points) expected_points = torch.tensor([[-0.7049, -1.2400, -1.4658, 2.5359], [-0.9881, -4.7599, -1.3857, 0.7167], [0.0974, -6.7093, -0.9697, 0.5599], @@ -1067,14 +1111,24 @@ def test_depth_boxes3d(): [0.0000, 0.0000, 1.0000]]) assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3) assert torch.allclose(points, expected_points, 1e-3) - assert torch.allclose(rot_mar_T, expected_rot_mat_T, 1e-3) + assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) + + # with input torch.Tensor points and rotation matrix + points, rot_mat_T = boxes.rotate(0.022998953275003075, points) # back + rot_mat = np.array([[0.99973554, 0.02299693, 0.], + [-0.02299693, 0.99973554, 0.], [0., 0., 1.]]) + points, rot_mat_T = boxes.rotate(rot_mat, points) + assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3) + assert torch.allclose(points, expected_points, 1e-3) + assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3) + # with input np.ndarray points and angle points_np = np.array([[0.6762, 1.2559, -1.4658, 2.5359], [0.8784, 4.7814, -1.3857, 0.7167], [-0.2517, 6.7053, -0.9697, 0.5599], [0.5520, 0.6533, -0.5265, 1.0032], [-0.5358, 4.5870, -1.4741, 0.0556]]) - points_np, rot_mar_T_np = boxes.rotate(-0.022998953275003075, points_np) + points_np, rot_mat_T_np = boxes.rotate(-0.022998953275003075, points_np) expected_points_np = np.array([[0.7049, 1.2400, -1.4658, 2.5359], [0.9881, 4.7599, -1.3857, 0.7167], [-0.0974, 6.7093, -0.9697, 0.5599], @@ -1090,7 +1144,17 @@ def test_depth_boxes3d(): [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]]) assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) assert np.allclose(points_np, expected_points_np, 1e-3) - assert np.allclose(rot_mar_T_np, expected_rot_mat_T_np, 1e-3) + assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + + # with input DepthPoints and rotation matrix + points_np, rot_mat_T_np = boxes.rotate(0.022998953275003075, points_np) + depth_points = DepthPoints(points_np, points_dim=4) + depth_points, rot_mat_T_np = boxes.rotate(rot_mat, depth_points) + points_np = depth_points.tensor.numpy() + assert torch.allclose(boxes.tensor, expected_tensor, 1e-3) + assert np.allclose(points_np, expected_points_np, 1e-3) + assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3) + th_boxes = torch.tensor( [[0.61211395, 0.8129094, 0.10563634, 1.497534, 0.16927195, 0.27956772], [1.430009, 0.49797538, 0.9382923, 0.07694054, 0.9312509, 1.8919173]], From fec675137b136dc70baec6cab3d72c37ccf7ef36 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Wed, 14 Apr 2021 14:00:37 +0800 Subject: [PATCH 03/12] add GlobalAlignment function to pipeline --- mmdet3d/datasets/__init__.py | 14 +- mmdet3d/datasets/pipelines/__init__.py | 12 +- mmdet3d/datasets/pipelines/transforms_3d.py | 166 ++++++++++++++++++ tests/data/scannet/scannet_infos.pkl | Bin 5920 -> 6188 bytes .../test_augmentations/test_transforms_3d.py | 118 ++++++++++++- 5 files changed, 294 insertions(+), 16 deletions(-) diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index df07e91831..f98b22858d 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -7,7 +7,8 @@ from .lyft_dataset import LyftDataset from .nuscenes_dataset import NuScenesDataset from .nuscenes_mono_dataset import NuScenesMonoDataset -from .pipelines import (BackgroundPointsFilter, GlobalRotScaleTrans, +from .pipelines import (BackgroundPointsFilter, GlobalAlignment, + GlobalRotScaleTrans, IndoorPatchPointSample, IndoorPointSample, LoadAnnotations3D, LoadPointsFromFile, LoadPointsFromMultiSweeps, NormalizePointsColor, ObjectNoise, ObjectRangeFilter, @@ -27,9 +28,10 @@ 'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', - 'NormalizePointsColor', 'IndoorPointSample', 'LoadAnnotations3D', - 'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset', 'S3DISSegDataset', - 'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset', - 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter', - 'VoxelBasedPointSampler', 'get_loading_pipeline' + 'S3DISSegDataset', + 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample', + 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 'ScanNetDataset', + 'ScanNetSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset', + 'Custom3DSegDataset', 'LoadPointsFromMultiSweeps', 'WaymoDataset', + 'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'get_loading_pipeline' ] diff --git a/mmdet3d/datasets/pipelines/__init__.py b/mmdet3d/datasets/pipelines/__init__.py index 67488b9141..4e0ce2c24d 100644 --- a/mmdet3d/datasets/pipelines/__init__.py +++ b/mmdet3d/datasets/pipelines/__init__.py @@ -6,11 +6,11 @@ LoadPointsFromMultiSweeps, NormalizePointsColor, PointSegClassMapping) from .test_time_aug import MultiScaleFlipAug3D -from .transforms_3d import (BackgroundPointsFilter, GlobalRotScaleTrans, - IndoorPatchPointSample, IndoorPointSample, - ObjectNoise, ObjectRangeFilter, ObjectSample, - PointShuffle, PointsRangeFilter, RandomFlip3D, - VoxelBasedPointSampler) +from .transforms_3d import (BackgroundPointsFilter, GlobalAlignment, + GlobalRotScaleTrans, IndoorPatchPointSample, + IndoorPointSample, ObjectNoise, ObjectRangeFilter, + ObjectSample, PointShuffle, PointsRangeFilter, + RandomFlip3D, VoxelBasedPointSampler) __all__ = [ 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', @@ -19,6 +19,6 @@ 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D', 'LoadPointsFromMultiSweeps', - 'BackgroundPointsFilter', 'VoxelBasedPointSampler', + 'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'GlobalAlignment', 'IndoorPatchPointSample', 'LoadImageFromFileMono3D' ] diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index 838ab73d7f..d5f46c47df 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -293,6 +293,172 @@ def __repr__(self): return repr_str +@PIPELINES.register_module() +class GlobalAlignment(object): + """Apply global alignment to 3D scene points by rotation and translation. + Extract 3D bboxes from the aligned points and instance mask if provided. + + Args: + rotation_axis (int): Rotation axis for points and bboxes rotation. + ignore_index (int): Label index for which we won't extract bboxes. + + Note: + This function should be called after PointSegClassMapping in pipeline. + We do not record the applied rotation and translation as in \ + GlobalRotScaleTrans. Because usually, we do not need to reverse \ + the alignment step. + For example, ScanNet 3D detection task uses aligned ground-truth \ + bounding boxes for evaluation. + """ + + def __init__(self, rotation_axis, ignore_index): + self.rotation_axis = rotation_axis + self.ignore_index = ignore_index + + def _trans_points(self, input_dict, trans_factor): + """Private function to translate points. + + Args: + input_dict (dict): Result dict from loading pipeline. + trans_factor (np.ndarray): Translation vector to be applied. + + Returns: + dict: Results after translation, 'points' is updated in the dict. + """ + input_dict['points'].translate(trans_factor) + + def _rot_points(self, input_dict, rot_mat): + """Private function to rotate bounding boxes and points. + + Args: + input_dict (dict): Result dict from loading pipeline. + rot_mat (np.ndarray): Rotation matrix to be applied. + + Returns: + dict: Results after rotation, 'points' is updated in the dict. + """ + # input should be rot_mat_T so I transpose it here + input_dict['points'].rotate(rot_mat.T) + + def _check_rot_mat(self, rot_mat): + """Check if rotation matrix is valid for self.rotation_axis. + + Args: + rot_mat (np.ndarray): Rotation matrix to be applied. + """ + is_valid = np.allclose(np.linalg.det(rot_mat), 1.0) + valid_array = np.zeros(3) + valid_array[self.rotation_axis] = 1.0 + is_valid &= (rot_mat[self.rotation_axis, :] == valid_array).all() + is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all() + assert is_valid, f'invalid rotation matrix {rot_mat}' + + def _bbox_from_points(self, points): + """Get the bounding box of a set of points. + + Args: + points (np.ndarray): A set of points belonging to one instance. + + Returns: + np.ndarray: A bounding box of input points. We use origin as \ + (0.5, 0.5, 0.5) without yaw. + """ + xmin = np.min(points[:, 0]) + ymin = np.min(points[:, 1]) + zmin = np.min(points[:, 2]) + xmax = np.max(points[:, 0]) + ymax = np.max(points[:, 1]) + zmax = np.max(points[:, 2]) + bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, + (zmin + zmax) / 2, xmax - xmin, ymax - ymin, + zmax - zmin]) + return bbox + + def _extract_bboxes(self, input_dict): + """Extract bounding boxes from points, semantic mask and instance mask. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after extracting bboxes, keys in \ + input_dict['bbox3d_fields'] are updated in the dict. + """ + assert 'pts_instance_mask' in input_dict.keys(), \ + 'instance mask is not provided in GlobalAlignment' + assert 'pts_semantic_mask' in input_dict.keys(), \ + 'semantic mask is not provided in GlobalAlignment' + + # TODO: this function is only used in ScanNet-Det currently + # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes + for key in input_dict['bbox3d_fields']: + if key != 'gt_bboxes_3d': + raise NotImplementedError( + f'GlobalAlignment does not support 3d bbox {key}') + + coords = input_dict['points'].coord.numpy() + inst_mask = input_dict['pts_instance_mask'] + sem_mask = input_dict['pts_semantic_mask'] + + # select points from valid categories where we want to extract bboxes + valid_cat_mask = (sem_mask != self.ignore_index) + inst_ids = np.unique(inst_mask[valid_cat_mask]) # ids of valid insts + instance_bboxes = np.zeros((inst_ids.shape[0], 7)) + inst_id2cat_id = { + inst_id: sem_mask[inst_mask == inst_id][0] + for inst_id in inst_ids + } + for bbox_idx, inst_id in enumerate(inst_ids): + cat_id = inst_id2cat_id[inst_id] + inst_coords = coords[inst_mask == inst_id] + bbox = self._bbox_from_points(inst_coords) + instance_bboxes[bbox_idx, :6] = bbox + instance_bboxes[bbox_idx, 6] = cat_id + + # TODO: currently only DepthInstance3DBoxes is supported! + # TODO: may support yaw in the future + original_type = type(input_dict['gt_bboxes_3d']) + input_dict['gt_bboxes_3d'] = original_type( + instance_bboxes[:, :6], + box_dim=6, + with_yaw=False, + origin=(0.5, 0.5, 0.5)) + if 'gt_labels_3d' in input_dict.keys(): + input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long) + + def __call__(self, input_dict): + """Call function to shuffle points. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after global alignment, 'points' and keys in \ + input_dict['bbox3d_fields'] are updated in the result dict. + """ + assert 'axis_align_matrix' in input_dict['annos'].keys(), \ + 'axis_align_matrix is not provided in GlobalAlignment' + + axis_align_matrix = input_dict['annos']['axis_align_matrix'] + assert axis_align_matrix.shape == (4, 4), \ + f'invalid shape {axis_align_matrix.shape} for axis_align_matrix' + rot_mat = axis_align_matrix[:3, :3] + trans_vec = axis_align_matrix[:3, -1] + + self._check_rot_mat(rot_mat) + self._rot_points(input_dict, rot_mat) + self._trans_points(input_dict, trans_vec) + self._extract_bboxes(input_dict) + + return input_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(rotation_axis={self.rotation_axis},' + repr_str += f' ignore_index={self.ignore_index})' + return repr_str + + @PIPELINES.register_module() class GlobalRotScaleTrans(object): """Apply global rotation, scaling and translation to a 3D scene. diff --git a/tests/data/scannet/scannet_infos.pkl b/tests/data/scannet/scannet_infos.pkl index 7ceba9efd5f4907da05985bb9103322d709a991a..d0fe1b839d248d140a595a9a20e5711ce1eeed77 100644 GIT binary patch delta 278 zcmZ3Wx5i+@VMeCTj?KpzD_KFb3;P#dAU)Ye^a6-0AjZR3A0f!Vz>rvxSsb63lbN0u zpPN`xlvz<2pCOnbMwL!F|B8<12S+K;RiqLzW7uH^8l!(Hizcd{LBn4HKY#(#JN q(AcEwIZ>ckVMc~nTVZBUVOB|Dc2a3+q8>FT7kJvGzjmphBO?d}3mggG9GLBss)*7+Fe7 G6ZHTrv=FNR diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py index e2a94cb868..0370722654 100644 --- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py +++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py @@ -3,11 +3,13 @@ import pytest import torch -from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes +from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, + DepthInstance3DBoxes, LiDARInstance3DBoxes) from mmdet3d.core.points import DepthPoints, LiDARPoints -from mmdet3d.datasets import (BackgroundPointsFilter, ObjectNoise, - ObjectSample, PointShuffle, PointsRangeFilter, - RandomFlip3D, VoxelBasedPointSampler) +from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment, + ObjectNoise, ObjectSample, RandomFlip3D, + PointShuffle, PointsRangeFilter, + VoxelBasedPointSampler) def test_remove_points_in_boxes(): @@ -221,6 +223,114 @@ def test_points_range_filter(): assert repr_str == expected_repr_str +def test_global_alignment(): + np.random.seed(0) + valid_cat_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, + 36, 39) + ignore_index = len(valid_cat_ids) + cat_ids2class = np.ones((41, ), dtype=np.int) * ignore_index + for class_id, cat_id in enumerate(valid_cat_ids): + cat_ids2class[cat_id] = class_id + + global_alignment = GlobalAlignment( + rotation_axis=2, ignore_index=ignore_index) + + points = np.fromfile('tests/data/scannet/points/scene0000_00.bin', + np.float32).reshape(-1, 6) + sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin', + np.long) + ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin', + np.long) + annos = mmcv.load('tests/data/scannet/scannet_infos.pkl') + info = annos[0] + gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'] + axis_align_matrix = info['annos']['axis_align_matrix'] + gt_labels_3d = info['annos']['class'] + + depth_points = DepthPoints(points.copy(), points_dim=6) + depth_bboxes = DepthInstance3DBoxes( + gt_bboxes_3d, + box_dim=gt_bboxes_3d.shape[-1], + with_yaw=False, + origin=(0.5, 0.5, 0.5)) + sem_mask = cat_ids2class[sem_mask] + + input_dict = dict( + points=depth_points.clone(), + gt_bboxes_3d=depth_bboxes, + bbox3d_fields=['gt_bboxes_3d'], + gt_labels_3d=gt_labels_3d, + annos=dict(axis_align_matrix=axis_align_matrix), + pts_instance_mask=ins_mask, + pts_semantic_mask=sem_mask) + + input_dict = global_alignment(input_dict) + trans_depth_points = input_dict['points'] + trans_depth_bboxes = input_dict['gt_bboxes_3d'] + trans_bbox_labels = input_dict['gt_labels_3d'] + + # construct expected transformed points by affine transformation + pts = np.ones((points.shape[0], 4)) + pts[:, :3] = points[:, :3] + trans_pts = np.dot(pts, axis_align_matrix.T) + expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1) + + expected_bbox_labels = np.array( + [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + expected_depth_bboxes = np.array( + [[ + -3.714606, -1.0654305, 0.6051854, 0.6297655, 1.9905674, 0.44288868, + 0. + ], + [ + -8.557551, -1.8178326, 0.20456636, 1.1263373, 2.7851129, + 1.8631845, 0. + ], + [ + -8.885854, -5.354957, 0.97720087, 0.9093195, 0.30981588, 0.566175, + 0. + ], + [ + -8.098918, -5.0357704, 0.03724962, 0.27458152, 0.20566699, + 0.5532104, 0. + ], + [ + -6.9733434, 0.33523083, -0.02958763, 1.2264912, 0.7187278, + 2.2613325, 0. + ], + [ + -5.36362, -1.6046655, 0.37014085, 2.8042943, 1.1057366, + 0.31707314, 0. + ], [-2.6299255, -2.3314357, 1.4469249, 0., 0., 0., 0.], + [-5.201888, -1.014641, 0.11020403, 0., 0., 0., 0.], + [ + -3.5216672, -6.8292904, 0.26571387, 0.13945593, 0.12182455, + 0.02463818, 0. + ], + [ + -6.4834313, -5.4506774, 0.13558027, 1.4790803, 0.6031074, + 0.60305846, 0. + ], + [ + -9.338867, -4.616579, 0.6112565, 0.17650154, 0.988079, 0.16838372, + 0. + ], [-2.0639155, -1.245964, 0.30754995, 0., 0., 0., 0.], + [-2.002855, -1.9495802, 2.2899528, 0., 0., 0., 0.], + [-2.1240144, -3.751592, 0.92695427, 0., 0., 0., 0.], + [-3.6406162, -5.1366153, 0.25374442, 0., 0., 0., 0.]]) + + assert np.allclose( + trans_depth_points.tensor.numpy(), expected_points, atol=1e-6) + assert np.all(trans_bbox_labels == expected_bbox_labels) + assert np.allclose( + trans_depth_bboxes.tensor.numpy(), expected_depth_bboxes, atol=1e-6) + + repr_str = repr(global_alignment) + expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \ + f' ignore_index={ignore_index})' + assert repr_str == expected_repr_str + + def test_random_flip_3d(): random_flip_3d = RandomFlip3D( flip_ratio_bev_horizontal=1.0, flip_ratio_bev_vertical=1.0) From 6af6107863261b93a336d47df1014b29ffb7eb80 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Wed, 14 Apr 2021 15:37:41 +0800 Subject: [PATCH 04/12] fix small bugs in GlobalAlignment --- mmdet3d/datasets/pipelines/transforms_3d.py | 18 +++++++++--------- .../test_augmentations/test_transforms_3d.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index d5f46c47df..5bd3d88056 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -384,18 +384,18 @@ def _extract_bboxes(self, input_dict): dict: Results after extracting bboxes, keys in \ input_dict['bbox3d_fields'] are updated in the dict. """ + # TODO: this function is only used in ScanNet-Det pipeline currently + # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes + if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']: + return + assert len(input_dict['bbox3d_fields']) == 1, \ + 'GlobalAlignment only support gt_bboxes_3d' + assert 'pts_instance_mask' in input_dict.keys(), \ 'instance mask is not provided in GlobalAlignment' assert 'pts_semantic_mask' in input_dict.keys(), \ 'semantic mask is not provided in GlobalAlignment' - # TODO: this function is only used in ScanNet-Det currently - # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes - for key in input_dict['bbox3d_fields']: - if key != 'gt_bboxes_3d': - raise NotImplementedError( - f'GlobalAlignment does not support 3d bbox {key}') - coords = input_dict['points'].coord.numpy() inst_mask = input_dict['pts_instance_mask'] sem_mask = input_dict['pts_semantic_mask'] @@ -436,10 +436,10 @@ def __call__(self, input_dict): dict: Results after global alignment, 'points' and keys in \ input_dict['bbox3d_fields'] are updated in the result dict. """ - assert 'axis_align_matrix' in input_dict['annos'].keys(), \ + assert 'axis_align_matrix' in input_dict['ann_info'].keys(), \ 'axis_align_matrix is not provided in GlobalAlignment' - axis_align_matrix = input_dict['annos']['axis_align_matrix'] + axis_align_matrix = input_dict['ann_info']['axis_align_matrix'] assert axis_align_matrix.shape == (4, 4), \ f'invalid shape {axis_align_matrix.shape} for axis_align_matrix' rot_mat = axis_align_matrix[:3, :3] diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py index 0370722654..bd820abf64 100644 --- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py +++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py @@ -260,7 +260,7 @@ def test_global_alignment(): gt_bboxes_3d=depth_bboxes, bbox3d_fields=['gt_bboxes_3d'], gt_labels_3d=gt_labels_3d, - annos=dict(axis_align_matrix=axis_align_matrix), + ann_info=dict(axis_align_matrix=axis_align_matrix), pts_instance_mask=ins_mask, pts_semantic_mask=sem_mask) From db025c1997a8edce632deccbf9873e96a59b6472 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Wed, 14 Apr 2021 16:07:09 +0800 Subject: [PATCH 05/12] modify ScanNetDataset class to support axis_align_matrix loading & modify ScanNet config file --- configs/_base_/datasets/scannet-3d-18class.py | 3 ++ mmdet3d/datasets/scannet_dataset.py | 15 ++++++- .../test_datasets/test_scannet_dataset.py | 37 +++++++++++------- .../test_pipelines/test_indoor_pipeline.py | 39 ++++++++++++------- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py index 3d3dc1e62f..8c8821d581 100644 --- a/configs/_base_/datasets/scannet-3d-18class.py +++ b/configs/_base_/datasets/scannet-3d-18class.py @@ -23,6 +23,9 @@ valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), max_cat_id=40), + dict( + type='GlobalAlignment', rotation_axis=2, + ignore_index=len(class_names)), dict(type='IndoorPointSample', num_points=40000), dict( type='RandomFlip3D', diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index 51336c5dd4..870c9003fb 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -1,5 +1,6 @@ import numpy as np import tempfile +import warnings from os import path as osp from mmdet3d.core import show_result, show_seg_result @@ -79,6 +80,8 @@ def get_ann_info(self, index): - gt_labels_3d (np.ndarray): Labels of ground truths. - pts_instance_mask_path (str): Path of instance masks. - pts_semantic_mask_path (str): Path of semantic masks. + - axis_align_matrix (np.ndarray): Transformation matrix for \ + global scene alignment. """ # Use index to get the annos, thus the evalhook could also use this api info = self.data_infos[index] @@ -102,11 +105,21 @@ def get_ann_info(self, index): pts_semantic_mask_path = osp.join(self.data_root, info['pts_semantic_mask_path']) + if 'axis_align_matrix' in info['annos'].keys(): + axis_align_matrix = info['annos']['axis_align_matrix'].astype( + np.float32) + else: + axis_align_matrix = np.eye(4).astype(np.float32) + warnings.warn( + 'axis_align_matrix is not found in ScanNet data info, please ' + 'use new pre-process scripts to re-generate ScanNet data') + anns_results = dict( gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d, pts_instance_mask_path=pts_instance_mask_path, - pts_semantic_mask_path=pts_semantic_mask_path) + pts_semantic_mask_path=pts_semantic_mask_path, + axis_align_matrix=axis_align_matrix) return anns_results def _build_default_pipeline(self): diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py index 69791a4fae..31ded87e36 100644 --- a/tests/test_data/test_datasets/test_scannet_dataset.py +++ b/tests/test_data/test_datasets/test_scannet_dataset.py @@ -27,6 +27,14 @@ def test_getitem(): with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, + 34, 36, 39)), + dict( + type='GlobalAlignment', + rotation_axis=2, + ignore_index=len(class_names)), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -63,22 +71,21 @@ def test_getitem(): assert file_name == './tests/data/scannet/points/scene0000_00.bin' assert np.allclose(pcd_rotation, expected_rotation, 1e-3) assert sample_idx == 'scene0000_00' - expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895], - [-0.4065, -3.4857, 2.1330, 2.1682], - [-1.4578, 1.3510, -0.0441, -0.0089], - [2.2428, -1.1323, -0.0288, 0.0064], - [0.7052, -2.9752, 1.5560, 1.5912]]) + expected_points = torch.tensor( + [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00], + [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00], + [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03], + [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], + [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( - [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000], - [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000], - [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000], - [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000], - [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]]) - expected_gt_labels = np.array([ - 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, - 0, 0, 0, 5, 5, 5 - ]) - expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15]) + [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000], + [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000], + [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000], + [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000], + [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]]) + expected_gt_labels = np.array( + [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) original_classes = scannet_dataset.CLASSES diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py index 31b60ef69f..9d24467bae 100644 --- a/tests/test_data/test_pipelines/test_indoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py @@ -27,6 +27,14 @@ def test_scannet_pipeline(): with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, + 34, 36, 39)), + dict( + type='GlobalAlignment', + rotation_axis=2, + ignore_index=len(class_names)), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -66,6 +74,8 @@ def test_scannet_pipeline(): results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes( scannet_gt_bboxes_3d, box_dim=6, with_yaw=False) results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d + results['ann_info']['axis_align_matrix'] = \ + info['annos']['axis_align_matrix'] results['img_fields'] = [] results['bbox3d_fields'] = [] @@ -79,22 +89,21 @@ def test_scannet_pipeline(): gt_labels_3d = results['gt_labels_3d']._data pts_semantic_mask = results['pts_semantic_mask']._data pts_instance_mask = results['pts_instance_mask']._data - expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895], - [-0.4065, -3.4857, 2.1330, 2.1682], - [-1.4578, 1.3510, -0.0441, -0.0089], - [2.2428, -1.1323, -0.0288, 0.0064], - [0.7052, -2.9752, 1.5560, 1.5912]]) + expected_points = torch.tensor( + [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00], + [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00], + [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03], + [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], + [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( - [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000], - [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000], - [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000], - [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000], - [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]]) - expected_gt_labels_3d = np.array([ - 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, - 0, 0, 0, 5, 5, 5 - ]) - expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15]) + [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000], + [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000], + [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000], + [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000], + [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]]) + expected_gt_labels_3d = np.array( + [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) assert torch.allclose(points, expected_points, 1e-2) assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d, From da9b64996bd1f5df872560d8ebbeece5e1811ed1 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Fri, 16 Apr 2021 16:01:15 +0800 Subject: [PATCH 06/12] add assertion message for rotate --- mmdet3d/core/bbox/structures/cam_box3d.py | 3 ++- mmdet3d/core/bbox/structures/depth_box3d.py | 3 ++- mmdet3d/core/bbox/structures/lidar_box3d.py | 3 ++- mmdet3d/core/points/base_points.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mmdet3d/core/bbox/structures/cam_box3d.py b/mmdet3d/core/bbox/structures/cam_box3d.py index d72391b160..0d804c1601 100644 --- a/mmdet3d/core/bbox/structures/cam_box3d.py +++ b/mmdet3d/core/bbox/structures/cam_box3d.py @@ -185,7 +185,8 @@ def rotate(self, angle, points=None): """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) - assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1 + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \ + f'invalid rotation angle shape {angle.shape}' if angle.numel() == 1: rot_sin = torch.sin(angle) diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py index 8e619114f9..c5aeceaebf 100644 --- a/mmdet3d/core/bbox/structures/depth_box3d.py +++ b/mmdet3d/core/bbox/structures/depth_box3d.py @@ -132,7 +132,8 @@ def rotate(self, angle, points=None): """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) - assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1 + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \ + f'invalid rotation angle shape {angle.shape}' if angle.numel() == 1: rot_sin = torch.sin(angle) diff --git a/mmdet3d/core/bbox/structures/lidar_box3d.py b/mmdet3d/core/bbox/structures/lidar_box3d.py index 2acd8cf60e..f7f1721f1e 100644 --- a/mmdet3d/core/bbox/structures/lidar_box3d.py +++ b/mmdet3d/core/bbox/structures/lidar_box3d.py @@ -130,7 +130,8 @@ def rotate(self, angle, points=None): """ if not isinstance(angle, torch.Tensor): angle = self.tensor.new_tensor(angle) - assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1 + assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \ + f'invalid rotation angle shape {angle.shape}' if angle.numel() == 1: rot_sin = torch.sin(angle) diff --git a/mmdet3d/core/points/base_points.py b/mmdet3d/core/points/base_points.py index 467b3bb4de..aa13fc023c 100644 --- a/mmdet3d/core/points/base_points.py +++ b/mmdet3d/core/points/base_points.py @@ -147,7 +147,7 @@ def rotate(self, rotation, axis=None): if not isinstance(rotation, torch.Tensor): rotation = self.tensor.new_tensor(rotation) assert rotation.shape == torch.Size([3, 3]) or \ - rotation.numel() == 1 + rotation.numel() == 1, f'invalid rotation shape {rotation.shape}' if axis is None: axis = self.rotation_axis From 56f59b6ca0c1534c24314c1cdf6a5fc0e82c2b05 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 20 Apr 2021 15:42:33 +0800 Subject: [PATCH 07/12] add exception in show_result when bbox shape is invalid --- mmdet3d/core/visualizer/show_result.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mmdet3d/core/visualizer/show_result.py b/mmdet3d/core/visualizer/show_result.py index c4f414ad3a..e7b4789e0a 100644 --- a/mmdet3d/core/visualizer/show_result.py +++ b/mmdet3d/core/visualizer/show_result.py @@ -61,7 +61,10 @@ def convert_oriented_box_to_trimesh_fmt(box): scene_bbox = np.zeros((1, 7)) scene = trimesh.scene.Scene() for box in scene_bbox: - scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box)) + try: + scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box)) + except ValueError: # invalid box shape, e.g. width==0 + continue mesh_list = trimesh.util.concatenate(scene.dump()) # save to obj file From 8d0205e08d312d53710274a581d2483490579cd0 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 20 Apr 2021 15:43:10 +0800 Subject: [PATCH 08/12] use pipeline to load bbox --- configs/_base_/datasets/scannet-3d-18class.py | 24 ++- mmdet3d/datasets/pipelines/transforms_3d.py | 27 +-- mmdet3d/datasets/scannet_dataset.py | 156 ++++++++++++++++-- .../test_datasets/test_scannet_dataset.py | 111 ++++++++----- .../test_augmentations/test_transforms_3d.py | 9 +- .../test_pipelines/test_indoor_pipeline.py | 7 +- 6 files changed, 262 insertions(+), 72 deletions(-) diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py index 8c8821d581..c34a575961 100644 --- a/configs/_base_/datasets/scannet-3d-18class.py +++ b/configs/_base_/datasets/scannet-3d-18class.py @@ -5,6 +5,8 @@ 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') +valid_class_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, + 39) train_pipeline = [ dict( type='LoadPointsFromFile', @@ -14,8 +16,8 @@ use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, + with_bbox_3d=False, + with_label_3d=False, with_mask_3d=True, with_seg_3d=True), dict( @@ -52,6 +54,9 @@ shift_height=True, load_dim=6, use_dim=[0, 1, 2]), + dict( + type='GlobalAlignment', rotation_axis=2, + ignore_index=len(class_names)), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), @@ -78,6 +83,7 @@ ] # construct a pipeline for data and gt loading in show function # please keep its loading function consistent with test_pipeline (e.g. client) +# we need to load gt masks for aligned gt bbox extracting eval_pipeline = [ dict( type='LoadPointsFromFile', @@ -85,11 +91,23 @@ shift_height=False, load_dim=6, use_dim=[0, 1, 2]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=True, + with_seg_3d=True), + dict(type='PointSegClassMapping', valid_cat_ids=valid_class_ids), + dict( + type='GlobalAlignment', + rotation_axis=2, + ignore_index=len(class_names), + extract_bbox=True), dict( type='DefaultFormatBundle3D', class_names=class_names, with_label=False), - dict(type='Collect3D', keys=['points']) + dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) ] data = dict( diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index 5bd3d88056..1aff3924e1 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -301,6 +301,9 @@ class GlobalAlignment(object): Args: rotation_axis (int): Rotation axis for points and bboxes rotation. ignore_index (int): Label index for which we won't extract bboxes. + extract_bbox (bool): Whether extract new ground-truth bboxes after \ + alignment. This requires instance and semantic mask inputs. + Defaults to False. Note: This function should be called after PointSegClassMapping in pipeline. @@ -311,9 +314,10 @@ class GlobalAlignment(object): bounding boxes for evaluation. """ - def __init__(self, rotation_axis, ignore_index): + def __init__(self, rotation_axis, ignore_index, extract_bbox=False): self.rotation_axis = rotation_axis self.ignore_index = ignore_index + self.extract_bbox = extract_bbox def _trans_points(self, input_dict, trans_factor): """Private function to translate points. @@ -386,10 +390,7 @@ def _extract_bboxes(self, input_dict): """ # TODO: this function is only used in ScanNet-Det pipeline currently # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes - if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']: - return - assert len(input_dict['bbox3d_fields']) == 1, \ - 'GlobalAlignment only support gt_bboxes_3d' + from mmdet3d.core.bbox import DepthInstance3DBoxes assert 'pts_instance_mask' in input_dict.keys(), \ 'instance mask is not provided in GlobalAlignment' @@ -415,16 +416,14 @@ def _extract_bboxes(self, input_dict): instance_bboxes[bbox_idx, :6] = bbox instance_bboxes[bbox_idx, 6] = cat_id - # TODO: currently only DepthInstance3DBoxes is supported! - # TODO: may support yaw in the future - original_type = type(input_dict['gt_bboxes_3d']) - input_dict['gt_bboxes_3d'] = original_type( + if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']: + input_dict['bbox3d_fields'].append('gt_bboxes_3d') + input_dict['gt_bboxes_3d'] = DepthInstance3DBoxes( instance_bboxes[:, :6], box_dim=6, with_yaw=False, origin=(0.5, 0.5, 0.5)) - if 'gt_labels_3d' in input_dict.keys(): - input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long) + input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long) def __call__(self, input_dict): """Call function to shuffle points. @@ -448,14 +447,16 @@ def __call__(self, input_dict): self._check_rot_mat(rot_mat) self._rot_points(input_dict, rot_mat) self._trans_points(input_dict, trans_vec) - self._extract_bboxes(input_dict) + if self.extract_bbox: + self._extract_bboxes(input_dict) return input_dict def __repr__(self): repr_str = self.__class__.__name__ repr_str += f'(rotation_axis={self.rotation_axis},' - repr_str += f' ignore_index={self.ignore_index})' + repr_str += f' ignore_index={self.ignore_index},' + repr_str += f' extract_bbox={self.extract_bbox})' return repr_str diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index 870c9003fb..dc7e34cae1 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -105,14 +105,7 @@ def get_ann_info(self, index): pts_semantic_mask_path = osp.join(self.data_root, info['pts_semantic_mask_path']) - if 'axis_align_matrix' in info['annos'].keys(): - axis_align_matrix = info['annos']['axis_align_matrix'].astype( - np.float32) - else: - axis_align_matrix = np.eye(4).astype(np.float32) - warnings.warn( - 'axis_align_matrix is not found in ScanNet data info, please ' - 'use new pre-process scripts to re-generate ScanNet data') + axis_align_matrix = self._get_axis_align_matrix(info) anns_results = dict( gt_bboxes_3d=gt_bboxes_3d, @@ -122,6 +115,128 @@ def get_ann_info(self, index): axis_align_matrix=axis_align_matrix) return anns_results + def prepare_test_data(self, index): + """Prepare data for testing. + + We should take axis_align_matrix from self.data_infos since we need \ + to align point clouds. + + Args: + index (int): Index for accessing the target data. + + Returns: + dict: Testing data dict of the corresponding index. + """ + input_dict = self.get_data_info(index) + # take the axis_align_matrix from data_infos + input_dict['ann_info'] = dict( + axis_align_matrix=self._get_axis_align_matrix( + self.data_infos[index])) + self.pre_pipeline(input_dict) + example = self.pipeline(input_dict) + return example + + @staticmethod + def _get_axis_align_matrix(info): + """Get axis_align_matrix from info. If not exist, return identity mat. + + Args: + info (dict): one data info term. + + Returns: + np.ndarray: 4x4 transformation matrix. + """ + if 'axis_align_matrix' in info['annos'].keys(): + return info['annos']['axis_align_matrix'].astype(np.float32) + else: + warnings.warn( + 'axis_align_matrix is not found in ScanNet data info, please ' + 'use new pre-process scripts to re-generate ScanNet data') + return np.eye(4).astype(np.float32) + + def evaluate(self, + results, + metric=None, + iou_thr=(0.25, 0.5), + logger=None, + show=False, + out_dir=None, + pipeline=None): + """Evaluate. + + Evaluation in indoor protocol. + Since ScanNet detection data pipeline re-computes ground-truth boxes, + we can't directly use gt_bboxes from self.data_infos. + + Args: + results (list[dict]): List of results. + metric (str | list[str]): Metrics to be evaluated. + iou_thr (list[float]): AP IoU thresholds. + show (bool): Whether to visualize. + Default: False. + out_dir (str): Path to save the visualization results. + Default: None. + pipeline (list[dict], optional): raw data loading for showing. + Default: None. + + Returns: + dict: Evaluation results. + """ + from mmdet3d.core.evaluation import indoor_eval + assert isinstance( + results, list), f'Expect results to be list, got {type(results)}.' + assert len(results) > 0, 'Expect length of results > 0.' + assert len(results) == len(self.data_infos) + assert isinstance( + results[0], dict + ), f'Expect elements in results to be dict, got {type(results[0])}.' + # load gt_bboxes via pipeline + pipeline = self._get_pipeline(pipeline) + gt_bboxes = [ + self._extract_data( + i, pipeline, ['gt_bboxes_3d', 'gt_labels_3d'], load_annos=True) + for i in range(len(self.data_infos)) + ] + gt_annos = [self._build_annos(*gt_bbox) for gt_bbox in gt_bboxes] + label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)} + ret_dict = indoor_eval( + gt_annos, + results, + iou_thr, + label2cat, + logger=logger, + box_type_3d=self.box_type_3d, + box_mode_3d=self.box_mode_3d) + if show: + self.show(results, out_dir, pipeline=pipeline) + + return ret_dict + + @staticmethod + def _build_annos(gt_bboxes, gt_labels): + """Transform gt bboxes and labels into self.data_infos['annos'] format. + + Args: + gt_bboxes (:obj:`BaseInstance3DBoxes`): \ + 3D bounding boxes in Depth coordinate + gt_labels (torch.Tensor): Labels of boxes. + + Returns: + dict: annotations including the following keys + + - gt_boxes_upright_depth (np.ndarray): 3D bounding boxes. + - class (np.ndarray): Labels of boxes. + - gt_num (int): Number of boxes. + """ + bbox = gt_bboxes.tensor.numpy()[:, :6].copy() # drop yaw dimension + bbox[..., 2] += bbox[..., 5] / 2 # bottom center to gravity center + anno = { + 'gt_boxes_upright_depth': bbox, + 'class': gt_labels.numpy(), + 'gt_num': gt_labels.shape[0] + } + return anno + def _build_default_pipeline(self): """Build the default pipeline for this dataset.""" pipeline = [ @@ -131,11 +246,28 @@ def _build_default_pipeline(self): shift_height=False, load_dim=6, use_dim=[0, 1, 2]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=True, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, + 33, 34, 36, 39)), + dict( + type='GlobalAlignment', + rotation_axis=2, + ignore_index=len(self.CLASSES), + extract_bbox=True), dict( type='DefaultFormatBundle3D', class_names=self.CLASSES, with_label=False), - dict(type='Collect3D', keys=['points']) + dict( + type='Collect3D', + keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) ] return Compose(pipeline) @@ -155,8 +287,10 @@ def show(self, results, out_dir, show=True, pipeline=None): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] - points = self._extract_data(i, pipeline, 'points').numpy() - gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() + points, gt_bboxes = self._extract_data( + i, pipeline, ['points', 'gt_bboxes_3d'], load_annos=True) + points = points.numpy() + gt_bboxes = gt_bboxes.tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show) diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py index 31ded87e36..37ea8ccba4 100644 --- a/tests/test_data/test_datasets/test_scannet_dataset.py +++ b/tests/test_data/test_datasets/test_scannet_dataset.py @@ -34,7 +34,8 @@ def test_getitem(): dict( type='GlobalAlignment', rotation_axis=2, - ignore_index=len(class_names)), + ignore_index=len(class_names), + extract_bbox=True), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -130,47 +131,65 @@ def test_evaluate(): results = [] pred_boxes = dict() pred_boxes['boxes_3d'] = DepthInstance3DBoxes( - torch.tensor([[ - 1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01, - 5.7235e-01, 0.0000e+00 - ], - [ - 2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01, - 1.7072e-01, 6.7154e-01, 0.0000e+00 - ], - [ - 1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01, - 2.5346e+00, 1.2145e+00, 0.0000e+00 - ], - [ - 2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01, - 1.8428e+00, 2.8697e-01, 0.0000e+00 - ], - [ - -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01, - 3.8582e+00, 2.1603e+00, 0.0000e+00 - ], - [ - -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00, - 2.1603e-01, 1.2767e+00, 0.0000e+00 - ], - [ - -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00, - 6.8676e-01, 1.0586e+00, 0.0000e+00 - ], - [ - -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00, - 1.4927e+00, 2.3380e+00, 0.0000e+00 - ]])) - pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11]) + torch.tensor( + [[-3.7146, -1.0654, 0.6052, 0.6298, 1.9906, 0.4429, 0.0000], + [-8.5576, -1.8178, 0.2046, 1.1263, 2.7851, 1.8632, 0.0000], + [-8.8859, -5.3550, 0.9772, 0.9093, 0.3098, 0.5662, 0.0000], + [-8.0989, -5.0358, 0.0372, 0.2746, 0.2057, 0.5532, 0.0000], + [-6.9733, 0.3352, -0.0296, 1.2265, 0.7187, 2.2613, 0.0000], + [-5.3636, -1.6047, 0.3701, 2.8043, 1.1057, 0.3171, 0.0000]])) + pred_boxes['labels_3d'] = torch.tensor([4, 11, 11, 10, 0, 3]) pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5]) results.append(pred_boxes) metric = [0.25, 0.5] ret_dict = scannet_dataset.evaluate(results, metric) - assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01 - assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01 assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01 + assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01 + + # test evaluate with pipeline + class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', + 'window', 'bookshelf', 'picture', 'counter', 'desk', + 'curtain', 'refrigerator', 'showercurtrain', 'toilet', + 'sink', 'bathtub', 'garbagebin') + eval_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + load_dim=6, + use_dim=[0, 1, 2]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=True, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, + 34, 36, 39)), + dict( + type='GlobalAlignment', + rotation_axis=2, + ignore_index=len(class_names), + extract_bbox=True), + dict( + type='DefaultFormatBundle3D', + class_names=class_names, + with_label=False), + dict( + type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) + ] + ret_dict = scannet_dataset.evaluate( + results, metric, pipeline=eval_pipeline) + assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01 + assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01 + assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01 def test_show(): @@ -233,11 +252,27 @@ def test_show(): shift_height=False, load_dim=6, use_dim=[0, 1, 2]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=True, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, + 34, 36, 39)), + dict( + type='GlobalAlignment', + rotation_axis=2, + ignore_index=len(class_names), + extract_bbox=True), dict( type='DefaultFormatBundle3D', class_names=class_names, with_label=False), - dict(type='Collect3D', keys=['points']) + dict( + type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) ] tmp_dir = tempfile.TemporaryDirectory() temp_dir = tmp_dir.name diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py index bd820abf64..8f672a2f65 100644 --- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py +++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py @@ -7,8 +7,8 @@ DepthInstance3DBoxes, LiDARInstance3DBoxes) from mmdet3d.core.points import DepthPoints, LiDARPoints from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment, - ObjectNoise, ObjectSample, RandomFlip3D, - PointShuffle, PointsRangeFilter, + ObjectNoise, ObjectSample, PointShuffle, + PointsRangeFilter, RandomFlip3D, VoxelBasedPointSampler) @@ -233,7 +233,7 @@ def test_global_alignment(): cat_ids2class[cat_id] = class_id global_alignment = GlobalAlignment( - rotation_axis=2, ignore_index=ignore_index) + rotation_axis=2, ignore_index=ignore_index, extract_bbox=True) points = np.fromfile('tests/data/scannet/points/scene0000_00.bin', np.float32).reshape(-1, 6) @@ -327,7 +327,8 @@ def test_global_alignment(): repr_str = repr(global_alignment) expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \ - f' ignore_index={ignore_index})' + f' ignore_index={ignore_index},' \ + f' extract_bbox=True)' assert repr_str == expected_repr_str diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py index 9d24467bae..5563dcd073 100644 --- a/tests/test_data/test_pipelines/test_indoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py @@ -23,8 +23,8 @@ def test_scannet_pipeline(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, + with_bbox_3d=False, + with_label_3d=False, with_mask_3d=True, with_seg_3d=True), dict( @@ -34,7 +34,8 @@ def test_scannet_pipeline(): dict( type='GlobalAlignment', rotation_axis=2, - ignore_index=len(class_names)), + ignore_index=len(class_names), + extract_bbox=True), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', From 0a7abe787cb8e32259e51a4eaf1adb876acb01a7 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 20 Apr 2021 20:05:19 +0800 Subject: [PATCH 09/12] extract both aligned and unaligned bbox in pre-processing --- data/scannet/batch_load_scannet_data.py | 17 +++--- data/scannet/load_scannet_data.py | 62 ++++++++++++++-------- tools/data_converter/scannet_data_utils.py | 39 +++++++++----- 3 files changed, 77 insertions(+), 41 deletions(-) diff --git a/data/scannet/batch_load_scannet_data.py b/data/scannet/batch_load_scannet_data.py index b55b363309..60b53b3db4 100644 --- a/data/scannet/batch_load_scannet_data.py +++ b/data/scannet/batch_load_scannet_data.py @@ -34,8 +34,8 @@ def export_one_scan(scan_name, scan_name + '_vh_clean_2.0.010000.segs.json') # includes axisAlignment info for the train set scans. meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt') - mesh_vertices, semantic_labels, instance_labels, instance_bboxes, \ - instance2semantic, axis_align_matrix = export( + mesh_vertices, semantic_labels, instance_labels, unaligned_bboxes, \ + aligned_bboxes, instance2semantic, axis_align_matrix = export( mesh_file, agg_file, seg_file, meta_file, label_map_file, None, test_mode) @@ -48,9 +48,12 @@ def export_one_scan(scan_name, num_instances = len(np.unique(instance_labels)) print(f'Num of instances: {num_instances}') - bbox_mask = np.in1d(instance_bboxes[:, -1], OBJ_CLASS_IDS) - instance_bboxes = instance_bboxes[bbox_mask, :] - print(f'Num of care instances: {instance_bboxes.shape[0]}') + bbox_mask = np.in1d(unaligned_bboxes[:, -1], OBJ_CLASS_IDS) + unaligned_bboxes = unaligned_bboxes[bbox_mask, :] + bbox_mask = np.in1d(aligned_bboxes[:, -1], OBJ_CLASS_IDS) + aligned_bboxes = aligned_bboxes[bbox_mask, :] + assert unaligned_bboxes.shape[0] == aligned_bboxes.shape[0] + print(f'Num of care instances: {unaligned_bboxes.shape[0]}') if max_num_point is not None: max_num_point = int(max_num_point) @@ -66,7 +69,9 @@ def export_one_scan(scan_name, if not test_mode: np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels) np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels) - np.save(f'{output_filename_prefix}_bbox.npy', instance_bboxes) + np.save(f'{output_filename_prefix}_unaligned_bbox.npy', + unaligned_bboxes) + np.save(f'{output_filename_prefix}_aligned_bbox.npy', aligned_bboxes) np.save(f'{output_filename_prefix}_axis_align_matrix.npy', axis_align_matrix) diff --git a/data/scannet/load_scannet_data.py b/data/scannet/load_scannet_data.py index 0cc20312a0..7e7cd55709 100644 --- a/data/scannet/load_scannet_data.py +++ b/data/scannet/load_scannet_data.py @@ -52,6 +52,29 @@ def read_segmentation(filename): return seg_to_verts, num_verts +def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id, + instance_ids): + num_instances = len(np.unique(list(object_id_to_segs.keys()))) + instance_bboxes = np.zeros((num_instances, 7)) + for obj_id in object_id_to_segs: + label_id = object_id_to_label_id[obj_id] + obj_pc = mesh_vertices[instance_ids == obj_id, 0:3] + if len(obj_pc) == 0: + continue + xmin = np.min(obj_pc[:, 0]) + ymin = np.min(obj_pc[:, 1]) + zmin = np.min(obj_pc[:, 2]) + xmax = np.max(obj_pc[:, 0]) + ymax = np.max(obj_pc[:, 1]) + zmax = np.max(obj_pc[:, 2]) + bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, + (zmin + zmax) / 2, xmax - xmin, ymax - ymin, + zmax - zmin, label_id]) + # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES + instance_bboxes[obj_id - 1, :] = bbox + return instance_bboxes + + def export(mesh_file, agg_file, seg_file, @@ -97,6 +120,13 @@ def export(mesh_file, break axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4)) + # perform global alignment of mesh vertices + pts = np.ones((mesh_vertices.shape[0], 4)) + pts[:, 0:3] = mesh_vertices[:, 0:3] + pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4 + aligned_mesh_vertices = np.concatenate([pts[:, 0:3], mesh_vertices[:, 3:]], + axis=1) + # Load semantic and instance labels if not test_mode: object_id_to_segs, label_to_segs = read_aggregation(agg_file) @@ -110,34 +140,21 @@ def export(mesh_file, label_ids[verts] = label_id instance_ids = np.zeros( shape=(num_verts), dtype=np.uint32) # 0: unannotated - num_instances = len(np.unique(list(object_id_to_segs.keys()))) for object_id, segs in object_id_to_segs.items(): for seg in segs: verts = seg_to_verts[seg] instance_ids[verts] = object_id if object_id not in object_id_to_label_id: object_id_to_label_id[object_id] = label_ids[verts][0] - instance_bboxes = np.zeros((num_instances, 7)) - for obj_id in object_id_to_segs: - label_id = object_id_to_label_id[obj_id] - obj_pc = mesh_vertices[instance_ids == obj_id, 0:3] - if len(obj_pc) == 0: - continue - xmin = np.min(obj_pc[:, 0]) - ymin = np.min(obj_pc[:, 1]) - zmin = np.min(obj_pc[:, 2]) - xmax = np.max(obj_pc[:, 0]) - ymax = np.max(obj_pc[:, 1]) - zmax = np.max(obj_pc[:, 2]) - bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, - (zmin + zmax) / 2, xmax - xmin, ymax - ymin, - zmax - zmin, label_id]) - # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES - instance_bboxes[obj_id - 1, :] = bbox + unaligned_bboxes = extract_bbox(mesh_vertices, object_id_to_segs, + object_id_to_label_id, instance_ids) + aligned_bboxes = extract_bbox(aligned_mesh_vertices, object_id_to_segs, + object_id_to_label_id, instance_ids) else: label_ids = None instance_ids = None - instance_bboxes = None + unaligned_bboxes = None + aligned_bboxes = None object_id_to_label_id = None if output_file is not None: @@ -145,11 +162,12 @@ def export(mesh_file, if not test_mode: np.save(output_file + '_sem_label.npy', label_ids) np.save(output_file + '_ins_label.npy', instance_ids) - np.save(output_file + '_bbox.npy', instance_bboxes) + np.save(output_file + '_unaligned_bbox.npy', unaligned_bboxes) + np.save(output_file + '_aligned_bbox.npy', aligned_bboxes) np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix) - return mesh_vertices, label_ids, instance_ids, \ - instance_bboxes, object_id_to_label_id, axis_align_matrix + return mesh_vertices, label_ids, instance_ids, unaligned_bboxes, \ + aligned_bboxes, object_id_to_label_id, axis_align_matrix def main(): diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py index 2f4ed60374..c94ab54156 100644 --- a/tools/data_converter/scannet_data_utils.py +++ b/tools/data_converter/scannet_data_utils.py @@ -42,9 +42,15 @@ def __init__(self, root_path, split='train'): def __len__(self): return len(self.sample_id_list) - def get_box_label(self, idx): + def get_aligned_box_label(self, idx): box_file = osp.join(self.root_dir, 'scannet_instance_data', - f'{idx}_bbox.npy') + f'{idx}_aligned_bbox.npy') + mmcv.check_file_exist(box_file) + return np.load(box_file) + + def get_unaligned_box_label(self, idx): + box_file = osp.join(self.root_dir, 'scannet_instance_data', + f'{idx}_unaligned_bbox.npy') mmcv.check_file_exist(box_file) return np.load(box_file) @@ -112,28 +118,35 @@ def process_single_scene(sample_idx): if has_label: annotations = {} - boxes_with_classes = self.get_box_label( - sample_idx) # k, 6 + class - annotations['gt_num'] = boxes_with_classes.shape[0] + # box is of shape [k, 6 + class] + aligned_box_label = self.get_aligned_box_label(sample_idx) + unaligned_box_label = self.get_unaligned_box_label(sample_idx) + annotations['gt_num'] = aligned_box_label.shape[0] if annotations['gt_num'] != 0: - minmax_boxes3d = boxes_with_classes[:, :-1] # k, 6 - classes = boxes_with_classes[:, -1] # k, 1 + aligned_box = aligned_box_label[:, :-1] # k, 6 + unaligned_box = unaligned_box_label[:, :-1] + classes = aligned_box_label[:, -1] # k annotations['name'] = np.array([ self.label2cat[self.cat_ids2class[classes[i]]] for i in range(annotations['gt_num']) ]) - annotations['location'] = minmax_boxes3d[:, :3] - annotations['dimensions'] = minmax_boxes3d[:, 3:6] - annotations['gt_boxes_upright_depth'] = minmax_boxes3d + # default names are given to aligned bbox for compatibility + # we also save unaligned bbox info with marked names + annotations['location'] = aligned_box[:, :3] + annotations['dimensions'] = aligned_box[:, 3:6] + annotations['gt_boxes_upright_depth'] = aligned_box + annotations['unaligned_location'] = unaligned_box[:, :3] + annotations['unaligned_dimensions'] = unaligned_box[:, 3:6] + annotations[ + 'unaligned_gt_boxes_upright_depth'] = unaligned_box annotations['index'] = np.arange( annotations['gt_num'], dtype=np.int32) annotations['class'] = np.array([ self.cat_ids2class[classes[i]] for i in range(annotations['gt_num']) ]) - axis_align_matrix = self.get_axis_align_matrix( - sample_idx) # [4, 4] - annotations['axis_align_matrix'] = axis_align_matrix + axis_align_matrix = self.get_axis_align_matrix(sample_idx) + annotations['axis_align_matrix'] = axis_align_matrix # 4x4 info['annos'] = annotations return info From bebbc2b53e5a2ba270052b1cb62e6ec67daf9ddc Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 20 Apr 2021 20:06:38 +0800 Subject: [PATCH 10/12] dataset read aligned bbox --- configs/_base_/datasets/scannet-3d-18class.py | 27 ++--- mmdet3d/datasets/pipelines/transforms_3d.py | 84 +------------ mmdet3d/datasets/scannet_dataset.py | 108 ++--------------- tests/data/scannet/scannet_infos.pkl | Bin 6188 -> 10105 bytes .../test_datasets/test_scannet_dataset.py | 113 +++++++++--------- .../test_augmentations/test_transforms_3d.py | 85 +------------ .../test_pipelines/test_indoor_pipeline.py | 26 ++-- 7 files changed, 94 insertions(+), 349 deletions(-) diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py index c34a575961..b97e858ba5 100644 --- a/configs/_base_/datasets/scannet-3d-18class.py +++ b/configs/_base_/datasets/scannet-3d-18class.py @@ -16,18 +16,16 @@ use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, + with_bbox_3d=True, + with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), max_cat_id=40), - dict( - type='GlobalAlignment', rotation_axis=2, - ignore_index=len(class_names)), dict(type='IndoorPointSample', num_points=40000), dict( type='RandomFlip3D', @@ -54,9 +52,7 @@ shift_height=True, load_dim=6, use_dim=[0, 1, 2]), - dict( - type='GlobalAlignment', rotation_axis=2, - ignore_index=len(class_names)), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), @@ -93,16 +89,11 @@ use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict(type='PointSegClassMapping', valid_cat_ids=valid_class_ids), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index 1aff3924e1..fa8509cb94 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -296,17 +296,11 @@ def __repr__(self): @PIPELINES.register_module() class GlobalAlignment(object): """Apply global alignment to 3D scene points by rotation and translation. - Extract 3D bboxes from the aligned points and instance mask if provided. Args: rotation_axis (int): Rotation axis for points and bboxes rotation. - ignore_index (int): Label index for which we won't extract bboxes. - extract_bbox (bool): Whether extract new ground-truth bboxes after \ - alignment. This requires instance and semantic mask inputs. - Defaults to False. Note: - This function should be called after PointSegClassMapping in pipeline. We do not record the applied rotation and translation as in \ GlobalRotScaleTrans. Because usually, we do not need to reverse \ the alignment step. @@ -314,10 +308,8 @@ class GlobalAlignment(object): bounding boxes for evaluation. """ - def __init__(self, rotation_axis, ignore_index, extract_bbox=False): + def __init__(self, rotation_axis): self.rotation_axis = rotation_axis - self.ignore_index = ignore_index - self.extract_bbox = extract_bbox def _trans_points(self, input_dict, trans_factor): """Private function to translate points. @@ -357,74 +349,6 @@ def _check_rot_mat(self, rot_mat): is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all() assert is_valid, f'invalid rotation matrix {rot_mat}' - def _bbox_from_points(self, points): - """Get the bounding box of a set of points. - - Args: - points (np.ndarray): A set of points belonging to one instance. - - Returns: - np.ndarray: A bounding box of input points. We use origin as \ - (0.5, 0.5, 0.5) without yaw. - """ - xmin = np.min(points[:, 0]) - ymin = np.min(points[:, 1]) - zmin = np.min(points[:, 2]) - xmax = np.max(points[:, 0]) - ymax = np.max(points[:, 1]) - zmax = np.max(points[:, 2]) - bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, - (zmin + zmax) / 2, xmax - xmin, ymax - ymin, - zmax - zmin]) - return bbox - - def _extract_bboxes(self, input_dict): - """Extract bounding boxes from points, semantic mask and instance mask. - - Args: - input_dict (dict): Result dict from loading pipeline. - - Returns: - dict: Results after extracting bboxes, keys in \ - input_dict['bbox3d_fields'] are updated in the dict. - """ - # TODO: this function is only used in ScanNet-Det pipeline currently - # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes - from mmdet3d.core.bbox import DepthInstance3DBoxes - - assert 'pts_instance_mask' in input_dict.keys(), \ - 'instance mask is not provided in GlobalAlignment' - assert 'pts_semantic_mask' in input_dict.keys(), \ - 'semantic mask is not provided in GlobalAlignment' - - coords = input_dict['points'].coord.numpy() - inst_mask = input_dict['pts_instance_mask'] - sem_mask = input_dict['pts_semantic_mask'] - - # select points from valid categories where we want to extract bboxes - valid_cat_mask = (sem_mask != self.ignore_index) - inst_ids = np.unique(inst_mask[valid_cat_mask]) # ids of valid insts - instance_bboxes = np.zeros((inst_ids.shape[0], 7)) - inst_id2cat_id = { - inst_id: sem_mask[inst_mask == inst_id][0] - for inst_id in inst_ids - } - for bbox_idx, inst_id in enumerate(inst_ids): - cat_id = inst_id2cat_id[inst_id] - inst_coords = coords[inst_mask == inst_id] - bbox = self._bbox_from_points(inst_coords) - instance_bboxes[bbox_idx, :6] = bbox - instance_bboxes[bbox_idx, 6] = cat_id - - if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']: - input_dict['bbox3d_fields'].append('gt_bboxes_3d') - input_dict['gt_bboxes_3d'] = DepthInstance3DBoxes( - instance_bboxes[:, :6], - box_dim=6, - with_yaw=False, - origin=(0.5, 0.5, 0.5)) - input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long) - def __call__(self, input_dict): """Call function to shuffle points. @@ -447,16 +371,12 @@ def __call__(self, input_dict): self._check_rot_mat(rot_mat) self._rot_points(input_dict, rot_mat) self._trans_points(input_dict, trans_vec) - if self.extract_bbox: - self._extract_bboxes(input_dict) return input_dict def __repr__(self): repr_str = self.__class__.__name__ - repr_str += f'(rotation_axis={self.rotation_axis},' - repr_str += f' ignore_index={self.ignore_index},' - repr_str += f' extract_bbox={self.extract_bbox})' + repr_str += f'(rotation_axis={self.rotation_axis})' return repr_str diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index dc7e34cae1..1dfff7d7f7 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -154,89 +154,6 @@ def _get_axis_align_matrix(info): 'use new pre-process scripts to re-generate ScanNet data') return np.eye(4).astype(np.float32) - def evaluate(self, - results, - metric=None, - iou_thr=(0.25, 0.5), - logger=None, - show=False, - out_dir=None, - pipeline=None): - """Evaluate. - - Evaluation in indoor protocol. - Since ScanNet detection data pipeline re-computes ground-truth boxes, - we can't directly use gt_bboxes from self.data_infos. - - Args: - results (list[dict]): List of results. - metric (str | list[str]): Metrics to be evaluated. - iou_thr (list[float]): AP IoU thresholds. - show (bool): Whether to visualize. - Default: False. - out_dir (str): Path to save the visualization results. - Default: None. - pipeline (list[dict], optional): raw data loading for showing. - Default: None. - - Returns: - dict: Evaluation results. - """ - from mmdet3d.core.evaluation import indoor_eval - assert isinstance( - results, list), f'Expect results to be list, got {type(results)}.' - assert len(results) > 0, 'Expect length of results > 0.' - assert len(results) == len(self.data_infos) - assert isinstance( - results[0], dict - ), f'Expect elements in results to be dict, got {type(results[0])}.' - # load gt_bboxes via pipeline - pipeline = self._get_pipeline(pipeline) - gt_bboxes = [ - self._extract_data( - i, pipeline, ['gt_bboxes_3d', 'gt_labels_3d'], load_annos=True) - for i in range(len(self.data_infos)) - ] - gt_annos = [self._build_annos(*gt_bbox) for gt_bbox in gt_bboxes] - label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)} - ret_dict = indoor_eval( - gt_annos, - results, - iou_thr, - label2cat, - logger=logger, - box_type_3d=self.box_type_3d, - box_mode_3d=self.box_mode_3d) - if show: - self.show(results, out_dir, pipeline=pipeline) - - return ret_dict - - @staticmethod - def _build_annos(gt_bboxes, gt_labels): - """Transform gt bboxes and labels into self.data_infos['annos'] format. - - Args: - gt_bboxes (:obj:`BaseInstance3DBoxes`): \ - 3D bounding boxes in Depth coordinate - gt_labels (torch.Tensor): Labels of boxes. - - Returns: - dict: annotations including the following keys - - - gt_boxes_upright_depth (np.ndarray): 3D bounding boxes. - - class (np.ndarray): Labels of boxes. - - gt_num (int): Number of boxes. - """ - bbox = gt_bboxes.tensor.numpy()[:, :6].copy() # drop yaw dimension - bbox[..., 2] += bbox[..., 5] / 2 # bottom center to gravity center - anno = { - 'gt_boxes_upright_depth': bbox, - 'class': gt_labels.numpy(), - 'gt_num': gt_labels.shape[0] - } - return anno - def _build_default_pipeline(self): """Build the default pipeline for this dataset.""" pipeline = [ @@ -248,19 +165,11 @@ def _build_default_pipeline(self): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict( - type='PointSegClassMapping', - valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, - 33, 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(self.CLASSES), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=self.CLASSES, @@ -287,10 +196,9 @@ def show(self, results, out_dir, show=True, pipeline=None): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] - points, gt_bboxes = self._extract_data( - i, pipeline, ['points', 'gt_bboxes_3d'], load_annos=True) - points = points.numpy() - gt_bboxes = gt_bboxes.tensor.numpy() + points = self._extract_data( + i, pipeline, 'points', load_annos=True).numpy() + gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show) diff --git a/tests/data/scannet/scannet_infos.pkl b/tests/data/scannet/scannet_infos.pkl index d0fe1b839d248d140a595a9a20e5711ce1eeed77..20595af6a5fece904a01fe9056a484a26e80eba0 100644 GIT binary patch literal 10105 zcmd^F_g7TO^1lQ@F|0W(hS4=(#<0P(_hMRian&6cch)p8Fu*7bGz@EahFMgC0TC4u z($O@iTB@pzn|(mh`H3GB#s#15MH!o0C;jL83*(t7Jm@M%X!u# zic@`_k)4p8enzE-2^Iv7lw;jgR}$9cEL*Oo)z{Uk=}t8xBRfeGTdN$~wr$JK%udc8 z8c;Jm^~K{|qaocPEbxgG=mqFy=rZ@I_V3~cKqwF0gI3ihY5%T0{C~rb_?*aj$I|J48(+;ThNJz$sb&Y%9FGoIC1v^Wz-4|l zw|Gpk_So}^;f6DuYQA>E*-15OLGe^qB6u(SMlBTAyU3{)!}S)6)BWLp(Da{dBcPUO z*ZVTbFu0l9XrRnPFS*C0dTOG^V@DT^u=vWb;^i*t=65FD8NJ%`KxoO{6ZrkMI7be~0R=UnlI7H0}7Cjlqjni|ekK4?w1LHQ+y_{5Q z`Vx9`V0Tio4c$mhQ~9|`eFgOPD7PF>;~)ih^4W#l^b4J%LE+iL4ZOgdjUXCvK6 zH-J*TcfFg>^F84N7<7+rt>!l-y(plD#ZGES08TsgQK)?GjA0zyIgdizcf!wiOm3ax z;pg0tmcl_FzoS36WB<2FT3)K%w@PH_UC%At#_iNN)H-^YFY=4rLm=n`5p{xU`G%QE zuc>kNBEB%hN~0TGM>ofBhkMtFZp5S((vu1N%%rm!)IOfO z52jk<=sCUE!F@QmZj+xRubfY(N(^guRU)_ZDfB-~YA@uApgWK)L}4`CG?!~(->Va; zmv3^zH$yCb@C7}lx_z)tt>DL#)`-bmp$9zZp__C&)#C1KK24`)P<664nz8(N8V{IM zL+_}5cL`rlvkZQa&pk8wJY9pfh2fNWhrsVay2lsS^E)0eEWYdHhRt+;qdt7W;VUi* z;wt<`&A(c7st9!G_pIe<)I$%$WWLHT0)kp9-XYVemIc&uk1Gu~zQ9-N^;g0@RH_vb z|FBa0!OW*+>2m-J9s*cihTnO!1Mcj$<-o2;YLeo!Z5BO}4$x`t*qm$uPVKmI zFHKk3Nxp5-0IWZA+Ujh_F(_U96rbJ4O(wN)&C;}-ulS}(&qo>52z$FVN|qORwj@|M zyC&}C4tfXE7&N(a;g&rHJ>+wmgD*>Q-i7mj-A@hl3?8hQTB#GLFZc~u0|!Ce*9W*A zohCKSTgANxaC>^pjT!WMZCksPO%cEK?BXyK{bgM;COhx z9oEcsv#5q2L)2ce?cjdy3Ru(yZ3D?MIYIn(H}zXk+G)N9@5zrcxWP;J_Syms%}El= z?u0FO!1sr8@kEyv&{>HDuc0oG*}_jpbI6>`gNLYlZyC3nTZc@@`BuzAIO*Gi>+{f@ z3e#=LsI_%v?v-90cB5{77&{aobK{sBh%b zuX3u@0ky`Xi%OJ$p0dN2BUL2`T44)WBDioL7 zj3Zx)!?VVbQpMwZ<4C(=x##%uvY1Xhj>IhvKOZx| zeB0h^*?R8NpdH*lRA7#MNXQ@RsUmE3SC8iLb>wA$LZIrCdiPxJvh0Kp96y<<89~u^ z&82sMWEWs=I#ITEeD=9ewoq@{jEA^76J@H*@tqu7#9(3OWWmA?Pb!dj3UJ!0kuv4r zRuvXt99~=0Y1r0`5dy>Jr)O!Zjy=gpfiY!q3k^;YAP1+p_10jge;pD~%!yX zHaF@mt-W|Z(j-6#@D_3Ztwx-UT)0<{w*1OhG&qFHyF-gg!9uFlr4tx_iIde;1J=Ld z)As36_X+?aiRa>opwKafkY++bqya#72F*(BZ`* ztqFr)?6*zo_#C??pkT(HFwgXG)TCB+m`KATLbDxk83HU3k+V^dhecvF! zIXYz7h8X{5Pf{=qwtQLOc2SI9Ebpdz+rl`wWV>Avhjc|Oc(dib zm&S9=3v1Lah%sl)TFS2k=%CKewYBC-EsO~cF8(QuT{_XeQHCyp0W3~G6OX>n*xbj?0hhpvAGK8E{kYX`U*Z=2eL0*CC{R}g+csn;Tt2HnfV zt0x(R?7CMbxpj5KC>tQZ_6A0{<`wuZc#*4`rkHeFm_SgMya4A?Gu=+HFrwg$g{d-t zK)T-5b12l_*mi(|d>6kKN_MS_ft8QKtV{DY-{Plwpo?6AQRg&USsa>#(npuKK$7wn zU}HJn+UaVGf=RG@ebB~UUET`#YiLc^vJMB;Ct8>RTXs>4vEkeOUTN*PDVn&$E@%Zc z$d~4z{KYVrE*!@D3XB|fIg61|R*C{%2AkdXz_Ke@nc~nATVE8+!W%HnB3>ry3Rd7F zmPYK4dhj~5U+8eSdd08`rLnF>4ZQ2QWJdyrUAf9?zMYKXRz}}%3gb=HC96y=OAl-M zAKYy_P6I(eidBIMUf2%Wt8BiEaG|2|Gb8uC|Q4rTuGN)$zPpn z_xqA{k9{Tgq{yzKw`d`dV%^xNF*4?Ljut_&b+p?KBZwAYqdEiJ#c0v@ldd94T#n{N ztY|Siri-zn2K87D2wIF9^kX`xXjwrF=M^<+h~?R$B^9x}p7rUZBf3M3o{~V9(cER! zj3t^cMF5PJ&BSoE94pz0;ft|*v|K2Lr;e5?#dPCQGN~9oK3bv`&Hv*FVOXL2I8=?GGroNB_f(&tKp@ta2lf zDNiDk$EoslWICe?!rxkbE0tHiwH7*6(Z|2F7VB@V@>yS60(v64q?pO*L(nC*;gzf* z|ClrkUB2kZ|2&OAABjE+{WJ8@=wr~OkQ|3DDeeSx`5&c8=#$Z>pns08d?L{Sr#cu= zCALKQC3%&Vsx%^zUnDAXs`9WzDk9N0szM~HbgIC|C92XA{S&){qw@P-kw}_IAx)%^ z7Woy)K5QbDG?7Z$-+%2y+}4D^BlIJ`q8VwT)&Hc4_ND#(SN$g1JLFV{1L}y!C2Zv# z_Eoxd@s5As3RL(GtD_N3{zEi*%&CrxCX=(1Yt#ulYYAZ;$_Bpn8FgjJ2g@GrnztYw z38*go05yz_)V7A|=oZyafN%Q~fd)TZsZsIgL;xQN&KB_-e7%m${q+}F8})cj^7VB` d@Fi^UufV@Q0a=|CQG-tPdq7ots;csq{4XU`36TH* literal 6188 zcmd@YYj+e?vhyH>0U;<6Feu{-B5DXCE)X6aMMY7c1Pn+B;6hjl=T!ZP{WrT+_fCg^p$9$dr@iOY%EP1CwE$+Q)WrZV}MDD4qtwdO3srt_V_ws_deXX80h9;h&95HgjFg|oqAtXoWz zOu1-09j~Vmtgjapd9#wmS-D_W*h+}$=6qLrZuw-+x=1oDD$U0W%*k}l3a6v-U}rej zp_a}tpCaNtgx^Z=O-RVaJHu%!8J#3yrj&3G;kOcInbT;Sa5|mIiAQ?G>{_#ep4+V; z887ApYRqzaOouz;qAE(LuAOz!Og3KEnNL~Ca5fv>=?iA#(G2OI%||Vtn5(#aY4Q?N z#iIdjuXrpPjAml-XwDZ;^Ta3SnI$ARBC6$nsj!tyuMm&-iTQ?Duum*B#1plFl0eN~ zQBx2YvvzjTx>Bxb^JhO_d z+d&JGYz0A#AVa{o2aVs$Z;_xdL4+Vh5GU{&Q6jPqc>>mn*Ylbz!Bm6!nV0GAUamd= z3x4(q@1G;+m`dP%IM&Ct*VTj9vu;xjmh198jQn=uXZdU|`;_O5<30G!m zEZb_A~Bjg9mG z_9x3`=`_(zxQ@3O6?(5>I%72W6_3qCn`UU^sZPAfRCL}n@v54k1N|jBA^pv83(qi8 zhoNSaHYbF#u2Zt*esfBka5usk#hXZok zm+-4;Oao_L6P{%hI(Y6m+%M(A0A5q>SoN`M;=qUa9TUNZJ1^jvv_q>wXS!5Cw}NOZ z@~Y){LQ+F-z$s+~I&CKyH#Bxbmljxa}L4x@Pm{M_G0*4UNJxZ z^dye+#-S9R@X1Y{!Gmy2IdAC>rIn7iaa^jP!`V%RV?)OzugUvK8n&y&KDYtr6_$AN$t2))Oh2Rie zc2fmj#jy!>e#8;H!n|m2r^_v~iM{}b)Xe`hTrbs$s=+YytL^oza9AxzN5@gTB=>}G z@gg4MDLCc3+}RqqfX5a6yLWIJauHqg%rqUDGFw{~wpXM?*>zod` z_wpM4Q~5*3i&f|-jaxE!PR%yBhulK<{5<1GqkdXb$f|_BpiY6*`W=+f_iK zvw~t%I@}L?@TRncj&Z#@b}jj7L3h|)s^oyZ)4PXps1xu=u0?ws4L!53cMAlz zXKuEOHAv)_;@R7~iJ}JEE4l05J(gGM*Ss1q@48U5Ju6-~D9|3;@#p+w|Kq9#p`WDY%Y9$ zd#CW$mwZ*c%Wr)FL;Q8>x4!q}TOWU5;88-rZ;RZGmJ>`P;PN+}fIpGWAmHAXzrM~Q zc!Xd!!5o4rg1H2b5zn1d%)CB|!37#MjO>EKohWNk|e^VAkQ{h}r7_LQI z*rE>&(Oj@-t839}@ex~O8shI$TeMAD^l#cl74EzLY!MH(f(Ki{!~JK=CJ(lX2V2GC z{(sf8D^D=pmGAzuXLztz|HxqP^SJ+C{DZyMVu)5td>k=Zt#EfT7YwJ8?P>ayY-N+( zBItTD#GVWrBEp_550v+b=-qDuG4^HL5N%WYvR(Spp$$wti^t}`Xd}HT!3`W;x~vAN z50pI@k+n^5rUCZDARMTscec9>aA)#L=H$_9(Hrg;?KoC=Ay?SRztsD|jmR!Mv3@69 bQEzFJ$@k8QStQseNkeS6L`Ni_57+${&rpJ> diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py index 37ea8ccba4..1974553447 100644 --- a/tests/test_data/test_datasets/test_scannet_dataset.py +++ b/tests/test_data/test_datasets/test_scannet_dataset.py @@ -27,15 +27,11 @@ def test_getitem(): with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -79,13 +75,15 @@ def test_getitem(): [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( - [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000], - [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000], - [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000], - [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000], - [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]]) - expected_gt_labels = np.array( - [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000], + [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000], + [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000], + [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000], + [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]]) + expected_gt_labels = np.array([ + 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, + 0, 0, 0, 5, 5, 5 + ]) expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) original_classes = scannet_dataset.CLASSES @@ -131,23 +129,47 @@ def test_evaluate(): results = [] pred_boxes = dict() pred_boxes['boxes_3d'] = DepthInstance3DBoxes( - torch.tensor( - [[-3.7146, -1.0654, 0.6052, 0.6298, 1.9906, 0.4429, 0.0000], - [-8.5576, -1.8178, 0.2046, 1.1263, 2.7851, 1.8632, 0.0000], - [-8.8859, -5.3550, 0.9772, 0.9093, 0.3098, 0.5662, 0.0000], - [-8.0989, -5.0358, 0.0372, 0.2746, 0.2057, 0.5532, 0.0000], - [-6.9733, 0.3352, -0.0296, 1.2265, 0.7187, 2.2613, 0.0000], - [-5.3636, -1.6047, 0.3701, 2.8043, 1.1057, 0.3171, 0.0000]])) - pred_boxes['labels_3d'] = torch.tensor([4, 11, 11, 10, 0, 3]) + torch.tensor([[ + 1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01, + 5.7235e-01, 0.0000e+00 + ], + [ + 2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01, + 1.7072e-01, 6.7154e-01, 0.0000e+00 + ], + [ + 1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01, + 2.5346e+00, 1.2145e+00, 0.0000e+00 + ], + [ + 2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01, + 1.8428e+00, 2.8697e-01, 0.0000e+00 + ], + [ + -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01, + 3.8582e+00, 2.1603e+00, 0.0000e+00 + ], + [ + -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00, + 2.1603e-01, 1.2767e+00, 0.0000e+00 + ], + [ + -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00, + 6.8676e-01, 1.0586e+00, 0.0000e+00 + ], + [ + -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00, + 1.4927e+00, 2.3380e+00, 0.0000e+00 + ]])) + pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11]) pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5]) results.append(pred_boxes) metric = [0.25, 0.5] ret_dict = scannet_dataset.evaluate(results, metric) - assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01 + assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01 + assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01 assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01 - assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01 # test evaluate with pipeline class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', @@ -163,19 +185,11 @@ def test_evaluate(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict( - type='PointSegClassMapping', - valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, - 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, @@ -185,11 +199,10 @@ def test_evaluate(): ] ret_dict = scannet_dataset.evaluate( results, metric, pipeline=eval_pipeline) - assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01 + assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01 + assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01 + assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01 assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01 - assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01 - assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01 def test_show(): @@ -254,19 +267,11 @@ def test_show(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, - with_mask_3d=True, - with_seg_3d=True), - dict( - type='PointSegClassMapping', - valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, - 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), + with_bbox_3d=True, + with_label_3d=True, + with_mask_3d=False, + with_seg_3d=False), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py index 8f672a2f65..5e64d7e6c4 100644 --- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py +++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py @@ -3,8 +3,7 @@ import pytest import torch -from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, - DepthInstance3DBoxes, LiDARInstance3DBoxes) +from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from mmdet3d.core.points import DepthPoints, LiDARPoints from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment, ObjectNoise, ObjectSample, PointShuffle, @@ -225,49 +224,22 @@ def test_points_range_filter(): def test_global_alignment(): np.random.seed(0) - valid_cat_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, - 36, 39) - ignore_index = len(valid_cat_ids) - cat_ids2class = np.ones((41, ), dtype=np.int) * ignore_index - for class_id, cat_id in enumerate(valid_cat_ids): - cat_ids2class[cat_id] = class_id - - global_alignment = GlobalAlignment( - rotation_axis=2, ignore_index=ignore_index, extract_bbox=True) + global_alignment = GlobalAlignment(rotation_axis=2) points = np.fromfile('tests/data/scannet/points/scene0000_00.bin', np.float32).reshape(-1, 6) - sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin', - np.long) - ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin', - np.long) annos = mmcv.load('tests/data/scannet/scannet_infos.pkl') info = annos[0] - gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'] axis_align_matrix = info['annos']['axis_align_matrix'] - gt_labels_3d = info['annos']['class'] depth_points = DepthPoints(points.copy(), points_dim=6) - depth_bboxes = DepthInstance3DBoxes( - gt_bboxes_3d, - box_dim=gt_bboxes_3d.shape[-1], - with_yaw=False, - origin=(0.5, 0.5, 0.5)) - sem_mask = cat_ids2class[sem_mask] input_dict = dict( points=depth_points.clone(), - gt_bboxes_3d=depth_bboxes, - bbox3d_fields=['gt_bboxes_3d'], - gt_labels_3d=gt_labels_3d, - ann_info=dict(axis_align_matrix=axis_align_matrix), - pts_instance_mask=ins_mask, - pts_semantic_mask=sem_mask) + ann_info=dict(axis_align_matrix=axis_align_matrix)) input_dict = global_alignment(input_dict) trans_depth_points = input_dict['points'] - trans_depth_bboxes = input_dict['gt_bboxes_3d'] - trans_bbox_labels = input_dict['gt_labels_3d'] # construct expected transformed points by affine transformation pts = np.ones((points.shape[0], 4)) @@ -275,60 +247,11 @@ def test_global_alignment(): trans_pts = np.dot(pts, axis_align_matrix.T) expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1) - expected_bbox_labels = np.array( - [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) - expected_depth_bboxes = np.array( - [[ - -3.714606, -1.0654305, 0.6051854, 0.6297655, 1.9905674, 0.44288868, - 0. - ], - [ - -8.557551, -1.8178326, 0.20456636, 1.1263373, 2.7851129, - 1.8631845, 0. - ], - [ - -8.885854, -5.354957, 0.97720087, 0.9093195, 0.30981588, 0.566175, - 0. - ], - [ - -8.098918, -5.0357704, 0.03724962, 0.27458152, 0.20566699, - 0.5532104, 0. - ], - [ - -6.9733434, 0.33523083, -0.02958763, 1.2264912, 0.7187278, - 2.2613325, 0. - ], - [ - -5.36362, -1.6046655, 0.37014085, 2.8042943, 1.1057366, - 0.31707314, 0. - ], [-2.6299255, -2.3314357, 1.4469249, 0., 0., 0., 0.], - [-5.201888, -1.014641, 0.11020403, 0., 0., 0., 0.], - [ - -3.5216672, -6.8292904, 0.26571387, 0.13945593, 0.12182455, - 0.02463818, 0. - ], - [ - -6.4834313, -5.4506774, 0.13558027, 1.4790803, 0.6031074, - 0.60305846, 0. - ], - [ - -9.338867, -4.616579, 0.6112565, 0.17650154, 0.988079, 0.16838372, - 0. - ], [-2.0639155, -1.245964, 0.30754995, 0., 0., 0., 0.], - [-2.002855, -1.9495802, 2.2899528, 0., 0., 0., 0.], - [-2.1240144, -3.751592, 0.92695427, 0., 0., 0., 0.], - [-3.6406162, -5.1366153, 0.25374442, 0., 0., 0., 0.]]) - assert np.allclose( trans_depth_points.tensor.numpy(), expected_points, atol=1e-6) - assert np.all(trans_bbox_labels == expected_bbox_labels) - assert np.allclose( - trans_depth_bboxes.tensor.numpy(), expected_depth_bboxes, atol=1e-6) repr_str = repr(global_alignment) - expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \ - f' ignore_index={ignore_index},' \ - f' extract_bbox=True)' + expected_repr_str = 'GlobalAlignment(rotation_axis=2)' assert repr_str == expected_repr_str diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py index 5563dcd073..6e705e85f9 100644 --- a/tests/test_data/test_pipelines/test_indoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py @@ -23,19 +23,15 @@ def test_scannet_pipeline(): use_dim=[0, 1, 2]), dict( type='LoadAnnotations3D', - with_bbox_3d=False, - with_label_3d=False, + with_bbox_3d=True, + with_label_3d=True, with_mask_3d=True, with_seg_3d=True), + dict(type='GlobalAlignment', rotation_axis=2), dict( type='PointSegClassMapping', valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39)), - dict( - type='GlobalAlignment', - rotation_axis=2, - ignore_index=len(class_names), - extract_bbox=True), dict(type='IndoorPointSample', num_points=5), dict( type='RandomFlip3D', @@ -97,13 +93,15 @@ def test_scannet_pipeline(): [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03], [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]]) expected_gt_bboxes_3d = torch.tensor( - [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000], - [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000], - [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000], - [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000], - [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]]) - expected_gt_labels_3d = np.array( - [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long) + [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000], + [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000], + [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000], + [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000], + [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]]) + expected_gt_labels_3d = np.array([ + 6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0, + 0, 0, 0, 5, 5, 5 + ]) expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18]) expected_pts_instance_mask = np.array([44, 22, 10, 10, 57]) assert torch.allclose(points, expected_points, 1e-2) From 14e6e08a0e0d02835979e0ff083159027f61dea4 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 20 Apr 2021 21:26:33 +0800 Subject: [PATCH 11/12] fix small bugs --- configs/_base_/datasets/scannet-3d-18class.py | 11 +---------- mmdet3d/core/visualizer/show_result.py | 5 +---- mmdet3d/datasets/__init__.py | 3 +-- mmdet3d/datasets/scannet_dataset.py | 13 ++----------- .../test_datasets/test_scannet_dataset.py | 18 ++---------------- tools/data_converter/scannet_data_utils.py | 3 --- 6 files changed, 7 insertions(+), 46 deletions(-) diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py index b97e858ba5..8fdd5e142a 100644 --- a/configs/_base_/datasets/scannet-3d-18class.py +++ b/configs/_base_/datasets/scannet-3d-18class.py @@ -5,8 +5,6 @@ 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') -valid_class_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, - 39) train_pipeline = [ dict( type='LoadPointsFromFile', @@ -79,7 +77,6 @@ ] # construct a pipeline for data and gt loading in show function # please keep its loading function consistent with test_pipeline (e.g. client) -# we need to load gt masks for aligned gt bbox extracting eval_pipeline = [ dict( type='LoadPointsFromFile', @@ -87,18 +84,12 @@ shift_height=False, load_dim=6, use_dim=[0, 1, 2]), - dict( - type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, - with_mask_3d=False, - with_seg_3d=False), dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, with_label=False), - dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) + dict(type='Collect3D', keys=['points']) ] data = dict( diff --git a/mmdet3d/core/visualizer/show_result.py b/mmdet3d/core/visualizer/show_result.py index e7b4789e0a..c4f414ad3a 100644 --- a/mmdet3d/core/visualizer/show_result.py +++ b/mmdet3d/core/visualizer/show_result.py @@ -61,10 +61,7 @@ def convert_oriented_box_to_trimesh_fmt(box): scene_bbox = np.zeros((1, 7)) scene = trimesh.scene.Scene() for box in scene_bbox: - try: - scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box)) - except ValueError: # invalid box shape, e.g. width==0 - continue + scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box)) mesh_list = trimesh.util.concatenate(scene.dump()) # save to obj file diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index f98b22858d..3dbab47e5c 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -27,8 +27,7 @@ 'DATASETS', 'build_dataset', 'CocoDataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter', - 'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', - 'S3DISSegDataset', + 'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', 'S3DISSegDataset', 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset', diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index 1dfff7d7f7..eaba0ad3fc 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -163,20 +163,12 @@ def _build_default_pipeline(self): shift_height=False, load_dim=6, use_dim=[0, 1, 2]), - dict( - type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, - with_mask_3d=False, - with_seg_3d=False), dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=self.CLASSES, with_label=False), - dict( - type='Collect3D', - keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) + dict(type='Collect3D', keys=['points']) ] return Compose(pipeline) @@ -196,8 +188,7 @@ def show(self, results, out_dir, show=True, pipeline=None): data_info = self.data_infos[i] pts_path = data_info['pts_path'] file_name = osp.split(pts_path)[-1].split('.')[0] - points = self._extract_data( - i, pipeline, 'points', load_annos=True).numpy() + points = self._extract_data(i, pipeline, 'points').numpy() gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy() show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py index 1974553447..b907fe92cb 100644 --- a/tests/test_data/test_datasets/test_scannet_dataset.py +++ b/tests/test_data/test_datasets/test_scannet_dataset.py @@ -183,19 +183,12 @@ def test_evaluate(): shift_height=False, load_dim=6, use_dim=[0, 1, 2]), - dict( - type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, - with_mask_3d=False, - with_seg_3d=False), dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, with_label=False), - dict( - type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) + dict(type='Collect3D', keys=['points']) ] ret_dict = scannet_dataset.evaluate( results, metric, pipeline=eval_pipeline) @@ -265,19 +258,12 @@ def test_show(): shift_height=False, load_dim=6, use_dim=[0, 1, 2]), - dict( - type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, - with_mask_3d=False, - with_seg_3d=False), dict(type='GlobalAlignment', rotation_axis=2), dict( type='DefaultFormatBundle3D', class_names=class_names, with_label=False), - dict( - type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) + dict(type='Collect3D', keys=['points']) ] tmp_dir = tempfile.TemporaryDirectory() temp_dir = tmp_dir.name diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py index c94ab54156..c52b3f5b63 100644 --- a/tools/data_converter/scannet_data_utils.py +++ b/tools/data_converter/scannet_data_utils.py @@ -219,9 +219,6 @@ def _convert_to_label(self, mask): mask = np.load(mask) else: mask = np.fromfile(mask, dtype=np.long) - # first filter out unannotated points (labeled as 0) - mask = mask[mask != 0] - # then convert to [0, 20) labels label = self.cat_id2class[mask] return label From a9cc7fd62a242ed7aa498232410da47f546a5de8 Mon Sep 17 00:00:00 2001 From: Wuziyi616 Date: Tue, 11 May 2021 21:12:33 +0800 Subject: [PATCH 12/12] simplify bbox extraction code --- data/scannet/load_scannet_data.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/data/scannet/load_scannet_data.py b/data/scannet/load_scannet_data.py index 7e7cd55709..911bb4c7fe 100644 --- a/data/scannet/load_scannet_data.py +++ b/data/scannet/load_scannet_data.py @@ -61,15 +61,10 @@ def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id, obj_pc = mesh_vertices[instance_ids == obj_id, 0:3] if len(obj_pc) == 0: continue - xmin = np.min(obj_pc[:, 0]) - ymin = np.min(obj_pc[:, 1]) - zmin = np.min(obj_pc[:, 2]) - xmax = np.max(obj_pc[:, 0]) - ymax = np.max(obj_pc[:, 1]) - zmax = np.max(obj_pc[:, 2]) - bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, - (zmin + zmax) / 2, xmax - xmin, ymax - ymin, - zmax - zmin, label_id]) + xyz_min = np.min(obj_pc, axis=0) + xyz_max = np.max(obj_pc, axis=0) + bbox = np.concatenate([(xyz_min + xyz_max) / 2.0, xyz_max - xyz_min, + np.array([label_id])]) # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES instance_bboxes[obj_id - 1, :] = bbox return instance_bboxes