From 29f8d895607789ac17c064126a659d54ff4f930d Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Mon, 12 Apr 2021 22:27:43 +0800
Subject: [PATCH 01/12] extract axis aligned matrix to info file

---
 data/scannet/batch_load_scannet_data.py    |  7 +++++--
 data/scannet/load_scannet_data.py          | 12 ++++--------
 tools/data_converter/scannet_data_utils.py |  9 +++++++++
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/data/scannet/batch_load_scannet_data.py b/data/scannet/batch_load_scannet_data.py
index c6bb3a7b34..b55b363309 100644
--- a/data/scannet/batch_load_scannet_data.py
+++ b/data/scannet/batch_load_scannet_data.py
@@ -35,8 +35,9 @@ def export_one_scan(scan_name,
     # includes axisAlignment info for the train set scans.
     meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt')
     mesh_vertices, semantic_labels, instance_labels, instance_bboxes, \
-        instance2semantic = export(mesh_file, agg_file, seg_file,
-                                   meta_file, label_map_file, None, test_mode)
+        instance2semantic, axis_align_matrix = export(
+            mesh_file, agg_file, seg_file, meta_file, label_map_file, None,
+            test_mode)
 
     if not test_mode:
         mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS))
@@ -66,6 +67,8 @@ def export_one_scan(scan_name,
         np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels)
         np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels)
         np.save(f'{output_filename_prefix}_bbox.npy', instance_bboxes)
+        np.save(f'{output_filename_prefix}_axis_align_matrix.npy',
+                axis_align_matrix)
 
 
 def batch_export(max_num_point,
diff --git a/data/scannet/load_scannet_data.py b/data/scannet/load_scannet_data.py
index d545ae48cd..0cc20312a0 100644
--- a/data/scannet/load_scannet_data.py
+++ b/data/scannet/load_scannet_data.py
@@ -69,7 +69,7 @@ def export(mesh_file,
         label_map_file (str): Path of the label_map_file.
         output_file (str): Path of the output folder.
             Default: None.
-        test_mode (bool): Whether is generating training data without labels.
+        test_mode (bool): Whether is generating test data without labels.
             Default: False.
 
     It returns a tuple, which containts the the following things:
@@ -86,8 +86,7 @@ def export(mesh_file,
 
     # Load scene axis alignment matrix
     lines = open(meta_file).readlines()
-    # TODO: test set data doesn't have align_matrix!
-    # TODO: save align_matrix and move align step to pipeline in the future
+    # test set data doesn't have align_matrix
     axis_align_matrix = np.eye(4)
     for line in lines:
         if 'axisAlignment' in line:
@@ -97,10 +96,6 @@ def export(mesh_file,
             ]
             break
     axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4))
-    pts = np.ones((mesh_vertices.shape[0], 4))
-    pts[:, 0:3] = mesh_vertices[:, 0:3]
-    pts = np.dot(pts, axis_align_matrix.transpose())  # Nx4
-    mesh_vertices[:, 0:3] = pts[:, 0:3]
 
     # Load semantic and instance labels
     if not test_mode:
@@ -151,9 +146,10 @@ def export(mesh_file,
             np.save(output_file + '_sem_label.npy', label_ids)
             np.save(output_file + '_ins_label.npy', instance_ids)
             np.save(output_file + '_bbox.npy', instance_bboxes)
+            np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix)
 
     return mesh_vertices, label_ids, instance_ids, \
-        instance_bboxes, object_id_to_label_id
+        instance_bboxes, object_id_to_label_id, axis_align_matrix
 
 
 def main():
diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py
index fa48f5a45b..2f4ed60374 100644
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
@@ -48,6 +48,12 @@ def get_box_label(self, idx):
         mmcv.check_file_exist(box_file)
         return np.load(box_file)
 
+    def get_axis_align_matrix(self, idx):
+        matrix_file = osp.join(self.root_dir, 'scannet_instance_data',
+                               f'{idx}_axis_align_matrix.npy')
+        mmcv.check_file_exist(matrix_file)
+        return np.load(matrix_file)
+
     def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
         """Get data infos.
 
@@ -125,6 +131,9 @@ def process_single_scene(sample_idx):
                         self.cat_ids2class[classes[i]]
                         for i in range(annotations['gt_num'])
                     ])
+                axis_align_matrix = self.get_axis_align_matrix(
+                    sample_idx)  # [4, 4]
+                annotations['axis_align_matrix'] = axis_align_matrix
                 info['annos'] = annotations
             return info
 

From f82a60543507aeb2903e78b960da33b8f8cfd2a1 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 13 Apr 2021 17:37:58 +0800
Subject: [PATCH 02/12] support rotation matrix input for
 BaseInstance3DBoxes.rotate function

---
 mmdet3d/core/bbox/structures/base_box3d.py  | 13 ++--
 mmdet3d/core/bbox/structures/cam_box3d.py   | 23 +++++--
 mmdet3d/core/bbox/structures/depth_box3d.py | 25 +++++--
 mmdet3d/core/bbox/structures/lidar_box3d.py | 23 +++++--
 tests/test_utils/test_box3d.py              | 74 +++++++++++++++++++--
 5 files changed, 129 insertions(+), 29 deletions(-)

diff --git a/mmdet3d/core/bbox/structures/base_box3d.py b/mmdet3d/core/bbox/structures/base_box3d.py
index fa674ef53f..c03cd12e49 100644
--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
@@ -129,12 +129,15 @@ def corners(self):
         pass
 
     @abstractmethod
-    def rotate(self, angles, axis=0):
-        """Calculate whether the points are in any of the boxes.
+    def rotate(self, angle, points=None):
+        """Rotate boxes with points (optional) with the given angle or \
+        rotation matrix.
 
         Args:
-            angles (float): Rotation angles.
-            axis (int): The axis to rotate the boxes.
+            angle (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
+            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):
+                Points to rotate. Defaults to None.
         """
         pass
 
@@ -144,7 +147,7 @@ def flip(self, bev_direction='horizontal'):
         pass
 
     def translate(self, trans_vector):
-        """Calculate whether the points are in any of the boxes.
+        """Translate boxes with the given translation vector.
 
         Args:
             trans_vector (torch.Tensor): Translation vector of size 1x3.
diff --git a/mmdet3d/core/bbox/structures/cam_box3d.py b/mmdet3d/core/bbox/structures/cam_box3d.py
index 4eab77bcc3..d72391b160 100644
--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
@@ -169,10 +169,12 @@ def nearest_bev(self):
         return bev_boxes
 
     def rotate(self, angle, points=None):
-        """Rotate boxes with points (optional) with the given angle.
+        """Rotate boxes with points (optional) with the given angle or \
+        rotation matrix.
 
         Args:
-            angle (float, torch.Tensor): Rotation angle.
+            angle (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
             points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):
                 Points to rotate. Defaults to None.
 
@@ -183,10 +185,19 @@ def rotate(self, angle, points=None):
         """
         if not isinstance(angle, torch.Tensor):
             angle = self.tensor.new_tensor(angle)
-        rot_sin = torch.sin(angle)
-        rot_cos = torch.cos(angle)
-        rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], [0, 1, 0],
-                                            [rot_sin, 0, rot_cos]])
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1
+
+        if angle.numel() == 1:
+            rot_sin = torch.sin(angle)
+            rot_cos = torch.cos(angle)
+            rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin],
+                                                [0, 1, 0],
+                                                [rot_sin, 0, rot_cos]])
+        else:
+            rot_mat_T = angle
+            rot_sin = rot_mat_T[2, 0]
+            rot_cos = rot_mat_T[0, 0]
+            angle = np.arctan2(rot_sin, rot_cos)
 
         self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
         self.tensor[:, 6] += angle
diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py
index a5eb6ed609..8e619114f9 100644
--- a/mmdet3d/core/bbox/structures/depth_box3d.py
+++ b/mmdet3d/core/bbox/structures/depth_box3d.py
@@ -116,10 +116,12 @@ def nearest_bev(self):
         return bev_boxes
 
     def rotate(self, angle, points=None):
-        """Rotate boxes with points (optional) with the given angle.
+        """Rotate boxes with points (optional) with the given angle or \
+        rotation matrix.
 
         Args:
-            angle (float, torch.Tensor): Rotation angle.
+            angle (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
             points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):
                 Points to rotate. Defaults to None.
 
@@ -130,11 +132,20 @@ def rotate(self, angle, points=None):
         """
         if not isinstance(angle, torch.Tensor):
             angle = self.tensor.new_tensor(angle)
-        rot_sin = torch.sin(angle)
-        rot_cos = torch.cos(angle)
-        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
-                                            [rot_sin, rot_cos, 0], [0, 0,
-                                                                    1]]).T
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1
+
+        if angle.numel() == 1:
+            rot_sin = torch.sin(angle)
+            rot_cos = torch.cos(angle)
+            rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
+                                                [rot_sin, rot_cos, 0],
+                                                [0, 0, 1]]).T
+        else:
+            rot_mat_T = angle.T
+            rot_sin = rot_mat_T[0, 1]
+            rot_cos = rot_mat_T[0, 0]
+            angle = np.arctan2(rot_sin, rot_cos)
+
         self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T
         if self.with_yaw:
             self.tensor[:, 6] -= angle
diff --git a/mmdet3d/core/bbox/structures/lidar_box3d.py b/mmdet3d/core/bbox/structures/lidar_box3d.py
index a17c3bf49b..2acd8cf60e 100644
--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
@@ -114,10 +114,12 @@ def nearest_bev(self):
         return bev_boxes
 
     def rotate(self, angle, points=None):
-        """Rotate boxes with points (optional) with the given angle.
+        """Rotate boxes with points (optional) with the given angle or \
+        rotation matrix.
 
         Args:
-            angle (float | torch.Tensor): Rotation angle.
+            angles (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
             points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):
                 Points to rotate. Defaults to None.
 
@@ -128,10 +130,19 @@ def rotate(self, angle, points=None):
         """
         if not isinstance(angle, torch.Tensor):
             angle = self.tensor.new_tensor(angle)
-        rot_sin = torch.sin(angle)
-        rot_cos = torch.cos(angle)
-        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
-                                            [rot_sin, rot_cos, 0], [0, 0, 1]])
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1
+
+        if angle.numel() == 1:
+            rot_sin = torch.sin(angle)
+            rot_cos = torch.cos(angle)
+            rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
+                                                [rot_sin, rot_cos, 0],
+                                                [0, 0, 1]])
+        else:
+            rot_mat_T = angle
+            rot_sin = rot_mat_T[1, 0]
+            rot_cos = rot_mat_T[0, 0]
+            angle = np.arctan2(rot_sin, rot_cos)
 
         self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
         self.tensor[:, 6] += angle
diff --git a/tests/test_utils/test_box3d.py b/tests/test_utils/test_box3d.py
index 3536d88122..8bcf12b46b 100644
--- a/tests/test_utils/test_box3d.py
+++ b/tests/test_utils/test_box3d.py
@@ -11,6 +11,7 @@
                                                 points_cam2img,
                                                 rotation_3d_in_axis,
                                                 xywhr2xyxyr)
+from mmdet3d.core.points import CameraPoints, DepthPoints, LiDARPoints
 
 
 def test_bbox3d_mapping_back():
@@ -225,6 +226,7 @@ def test_lidar_boxes3d():
     assert torch.allclose(points, expected_points)
 
     # test box rotation
+    # with input torch.Tensor points and angle
     expected_tensor = torch.tensor(
         [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976],
          [8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, 1.6576],
@@ -244,6 +246,16 @@ def test_lidar_boxes3d():
     assert torch.allclose(points, expected_points, 1e-3)
     assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)
 
+    # with input torch.Tensor points and rotation matrix
+    points, rot_mat_T = boxes.rotate(-0.13603681398218053, points)  # back
+    rot_mat = np.array([[0.99076125, -0.13561762, 0.],
+                        [0.13561762, 0.99076125, 0.], [0., 0., 1.]])
+    points, rot_mat_T = boxes.rotate(rot_mat, points)
+    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
+    assert torch.allclose(points, expected_points, 1e-3)
+    assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)
+
+    # with input np.ndarray points and angle
     points_np = np.array([[-1.0280, 0.9888,
                            -1.4658], [-4.3695, 2.1310, -1.3857],
                           [-6.5263, 1.5595,
@@ -262,6 +274,15 @@ def test_lidar_boxes3d():
     assert np.allclose(points_np, expected_points_np, 1e-3)
     assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)
 
+    # with input LiDARPoints and rotation matrix
+    points_np, rot_mat_T_np = boxes.rotate(-0.13603681398218053, points_np)
+    lidar_points = LiDARPoints(points_np)
+    lidar_points, rot_mat_T_np = boxes.rotate(rot_mat, lidar_points)
+    points_np = lidar_points.tensor.numpy()
+
+    assert np.allclose(points_np, expected_points_np, 1e-3)
+    assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)
+
     # test box scaling
     expected_tensor = torch.tensor([[
         1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
@@ -701,6 +722,7 @@ def test_camera_boxes3d():
     assert torch.allclose(points, expected_points)
 
     # test box rotation
+    # with input torch.Tensor points and angle
     expected_tensor = Box3DMode.convert(
         torch.tensor(
             [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976],
@@ -722,6 +744,17 @@ def test_camera_boxes3d():
     assert torch.allclose(points, expected_points, 1e-3)
     assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)
 
+    # with input torch.Tensor points and rotation matrix
+    points, rot_mat_T = boxes.rotate(
+        torch.tensor(-0.13603681398218053), points)  # back
+    rot_mat = np.array([[0.99076125, 0., -0.13561762], [0., 1., 0.],
+                        [0.13561762, 0., 0.99076125]])
+    points, rot_mat_T = boxes.rotate(rot_mat, points)
+    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
+    assert torch.allclose(points, expected_points, 1e-3)
+    assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)
+
+    # with input np.ndarray points and angle
     points_np = np.array([[0.6762, 1.2559, -1.4658, 2.5359],
                           [0.8784, 4.7814, -1.3857, 0.7167],
                           [-0.2517, 6.7053, -0.9697, 0.5599],
@@ -741,6 +774,15 @@ def test_camera_boxes3d():
     assert np.allclose(points_np, expected_points_np, 1e-3)
     assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)
 
+    # with input CameraPoints and rotation matrix
+    points_np, rot_mat_T_np = boxes.rotate(
+        torch.tensor(-0.13603681398218053), points_np)
+    camera_points = CameraPoints(points_np, points_dim=4)
+    camera_points, rot_mat_T_np = boxes.rotate(rot_mat, camera_points)
+    points_np = camera_points.tensor.numpy()
+    assert np.allclose(points_np, expected_points_np, 1e-3)
+    assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)
+
     # test box scaling
     expected_tensor = Box3DMode.convert(
         torch.tensor([[
@@ -1007,7 +1049,7 @@ def test_depth_boxes3d():
     # test box concatenation
     expected_tensor = torch.tensor(
         [[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],
-         [2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971],
+         [2.3262, 3.3065, 0.44255, 0.8234, 0.5325, 1.0099, 2.9971],
          [2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
          [1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]])
     boxes = DepthInstance3DBoxes.cat([boxes_1, boxes_2])
@@ -1049,14 +1091,16 @@ def test_depth_boxes3d():
                                     [0.5358, -4.5870, -1.4741, 0.0556]])
     assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
     assert torch.allclose(points, expected_points)
+
     # test box rotation
+    # with input torch.Tensor points and angle
     boxes_rot = boxes.clone()
     expected_tensor = torch.tensor(
         [[-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585],
          [-2.4016, -3.2521, 0.4426, 0.8234, 0.5325, 1.0099, -0.1215],
          [-2.5181, -2.5298, -0.4321, 0.8597, 0.6193, 1.0204, -0.0493],
          [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]])
-    points, rot_mar_T = boxes_rot.rotate(-0.022998953275003075, points)
+    points, rot_mat_T = boxes_rot.rotate(-0.022998953275003075, points)
     expected_points = torch.tensor([[-0.7049, -1.2400, -1.4658, 2.5359],
                                     [-0.9881, -4.7599, -1.3857, 0.7167],
                                     [0.0974, -6.7093, -0.9697, 0.5599],
@@ -1067,14 +1111,24 @@ def test_depth_boxes3d():
                                        [0.0000, 0.0000, 1.0000]])
     assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3)
     assert torch.allclose(points, expected_points, 1e-3)
-    assert torch.allclose(rot_mar_T, expected_rot_mat_T, 1e-3)
+    assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)
+
+    # with input torch.Tensor points and rotation matrix
+    points, rot_mat_T = boxes.rotate(0.022998953275003075, points)  # back
+    rot_mat = np.array([[0.99973554, 0.02299693, 0.],
+                        [-0.02299693, 0.99973554, 0.], [0., 0., 1.]])
+    points, rot_mat_T = boxes.rotate(rot_mat, points)
+    assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3)
+    assert torch.allclose(points, expected_points, 1e-3)
+    assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)
 
+    # with input np.ndarray points and angle
     points_np = np.array([[0.6762, 1.2559, -1.4658, 2.5359],
                           [0.8784, 4.7814, -1.3857, 0.7167],
                           [-0.2517, 6.7053, -0.9697, 0.5599],
                           [0.5520, 0.6533, -0.5265, 1.0032],
                           [-0.5358, 4.5870, -1.4741, 0.0556]])
-    points_np, rot_mar_T_np = boxes.rotate(-0.022998953275003075, points_np)
+    points_np, rot_mat_T_np = boxes.rotate(-0.022998953275003075, points_np)
     expected_points_np = np.array([[0.7049, 1.2400, -1.4658, 2.5359],
                                    [0.9881, 4.7599, -1.3857, 0.7167],
                                    [-0.0974, 6.7093, -0.9697, 0.5599],
@@ -1090,7 +1144,17 @@ def test_depth_boxes3d():
          [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]])
     assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
     assert np.allclose(points_np, expected_points_np, 1e-3)
-    assert np.allclose(rot_mar_T_np, expected_rot_mat_T_np, 1e-3)
+    assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)
+
+    # with input DepthPoints and rotation matrix
+    points_np, rot_mat_T_np = boxes.rotate(0.022998953275003075, points_np)
+    depth_points = DepthPoints(points_np, points_dim=4)
+    depth_points, rot_mat_T_np = boxes.rotate(rot_mat, depth_points)
+    points_np = depth_points.tensor.numpy()
+    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
+    assert np.allclose(points_np, expected_points_np, 1e-3)
+    assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)
+
     th_boxes = torch.tensor(
         [[0.61211395, 0.8129094, 0.10563634, 1.497534, 0.16927195, 0.27956772],
          [1.430009, 0.49797538, 0.9382923, 0.07694054, 0.9312509, 1.8919173]],

From fec675137b136dc70baec6cab3d72c37ccf7ef36 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Wed, 14 Apr 2021 14:00:37 +0800
Subject: [PATCH 03/12] add GlobalAlignment function to pipeline

---
 mmdet3d/datasets/__init__.py                  |  14 +-
 mmdet3d/datasets/pipelines/__init__.py        |  12 +-
 mmdet3d/datasets/pipelines/transforms_3d.py   | 166 ++++++++++++++++++
 tests/data/scannet/scannet_infos.pkl          | Bin 5920 -> 6188 bytes
 .../test_augmentations/test_transforms_3d.py  | 118 ++++++++++++-
 5 files changed, 294 insertions(+), 16 deletions(-)

diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py
index df07e91831..f98b22858d 100644
--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
@@ -7,7 +7,8 @@
 from .lyft_dataset import LyftDataset
 from .nuscenes_dataset import NuScenesDataset
 from .nuscenes_mono_dataset import NuScenesMonoDataset
-from .pipelines import (BackgroundPointsFilter, GlobalRotScaleTrans,
+from .pipelines import (BackgroundPointsFilter, GlobalAlignment,
+                        GlobalRotScaleTrans, IndoorPatchPointSample,
                         IndoorPointSample, LoadAnnotations3D,
                         LoadPointsFromFile, LoadPointsFromMultiSweeps,
                         NormalizePointsColor, ObjectNoise, ObjectRangeFilter,
@@ -27,9 +28,10 @@
     'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D',
     'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter',
     'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile',
-    'NormalizePointsColor', 'IndoorPointSample', 'LoadAnnotations3D',
-    'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset', 'S3DISSegDataset',
-    'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset',
-    'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
-    'VoxelBasedPointSampler', 'get_loading_pipeline'
+    'S3DISSegDataset',
+    'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
+    'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 'ScanNetDataset',
+    'ScanNetSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset',
+    'Custom3DSegDataset', 'LoadPointsFromMultiSweeps', 'WaymoDataset',
+    'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'get_loading_pipeline'
 ]
diff --git a/mmdet3d/datasets/pipelines/__init__.py b/mmdet3d/datasets/pipelines/__init__.py
index 67488b9141..4e0ce2c24d 100644
--- a/mmdet3d/datasets/pipelines/__init__.py
+++ b/mmdet3d/datasets/pipelines/__init__.py
@@ -6,11 +6,11 @@
                       LoadPointsFromMultiSweeps, NormalizePointsColor,
                       PointSegClassMapping)
 from .test_time_aug import MultiScaleFlipAug3D
-from .transforms_3d import (BackgroundPointsFilter, GlobalRotScaleTrans,
-                            IndoorPatchPointSample, IndoorPointSample,
-                            ObjectNoise, ObjectRangeFilter, ObjectSample,
-                            PointShuffle, PointsRangeFilter, RandomFlip3D,
-                            VoxelBasedPointSampler)
+from .transforms_3d import (BackgroundPointsFilter, GlobalAlignment,
+                            GlobalRotScaleTrans, IndoorPatchPointSample,
+                            IndoorPointSample, ObjectNoise, ObjectRangeFilter,
+                            ObjectSample, PointShuffle, PointsRangeFilter,
+                            RandomFlip3D, VoxelBasedPointSampler)
 
 __all__ = [
     'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
@@ -19,6 +19,6 @@
     'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
     'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
     'PointSegClassMapping', 'MultiScaleFlipAug3D', 'LoadPointsFromMultiSweeps',
-    'BackgroundPointsFilter', 'VoxelBasedPointSampler',
+    'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'GlobalAlignment',
     'IndoorPatchPointSample', 'LoadImageFromFileMono3D'
 ]
diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py
index 838ab73d7f..d5f46c47df 100644
--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -293,6 +293,172 @@ def __repr__(self):
         return repr_str
 
 
+@PIPELINES.register_module()
+class GlobalAlignment(object):
+    """Apply global alignment to 3D scene points by rotation and translation.
+    Extract 3D bboxes from the aligned points and instance mask if provided.
+
+    Args:
+        rotation_axis (int): Rotation axis for points and bboxes rotation.
+        ignore_index (int): Label index for which we won't extract bboxes.
+
+    Note:
+        This function should be called after PointSegClassMapping in pipeline.
+        We do not record the applied rotation and translation as in \
+            GlobalRotScaleTrans. Because usually, we do not need to reverse \
+            the alignment step.
+        For example, ScanNet 3D detection task uses aligned ground-truth \
+            bounding boxes for evaluation.
+    """
+
+    def __init__(self, rotation_axis, ignore_index):
+        self.rotation_axis = rotation_axis
+        self.ignore_index = ignore_index
+
+    def _trans_points(self, input_dict, trans_factor):
+        """Private function to translate points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+            trans_factor (np.ndarray): Translation vector to be applied.
+
+        Returns:
+            dict: Results after translation, 'points' is updated in the dict.
+        """
+        input_dict['points'].translate(trans_factor)
+
+    def _rot_points(self, input_dict, rot_mat):
+        """Private function to rotate bounding boxes and points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+            rot_mat (np.ndarray): Rotation matrix to be applied.
+
+        Returns:
+            dict: Results after rotation, 'points' is updated in the dict.
+        """
+        # input should be rot_mat_T so I transpose it here
+        input_dict['points'].rotate(rot_mat.T)
+
+    def _check_rot_mat(self, rot_mat):
+        """Check if rotation matrix is valid for self.rotation_axis.
+
+        Args:
+            rot_mat (np.ndarray): Rotation matrix to be applied.
+        """
+        is_valid = np.allclose(np.linalg.det(rot_mat), 1.0)
+        valid_array = np.zeros(3)
+        valid_array[self.rotation_axis] = 1.0
+        is_valid &= (rot_mat[self.rotation_axis, :] == valid_array).all()
+        is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()
+        assert is_valid, f'invalid rotation matrix {rot_mat}'
+
+    def _bbox_from_points(self, points):
+        """Get the bounding box of a set of points.
+
+        Args:
+            points (np.ndarray): A set of points belonging to one instance.
+
+        Returns:
+            np.ndarray: A bounding box of input points. We use origin as \
+                (0.5, 0.5, 0.5) without yaw.
+        """
+        xmin = np.min(points[:, 0])
+        ymin = np.min(points[:, 1])
+        zmin = np.min(points[:, 2])
+        xmax = np.max(points[:, 0])
+        ymax = np.max(points[:, 1])
+        zmax = np.max(points[:, 2])
+        bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
+                         (zmin + zmax) / 2, xmax - xmin, ymax - ymin,
+                         zmax - zmin])
+        return bbox
+
+    def _extract_bboxes(self, input_dict):
+        """Extract bounding boxes from points, semantic mask and instance mask.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after extracting bboxes, keys in \
+                input_dict['bbox3d_fields'] are updated in the dict.
+        """
+        assert 'pts_instance_mask' in input_dict.keys(), \
+            'instance mask is not provided in GlobalAlignment'
+        assert 'pts_semantic_mask' in input_dict.keys(), \
+            'semantic mask is not provided in GlobalAlignment'
+
+        # TODO: this function is only used in ScanNet-Det currently
+        # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes
+        for key in input_dict['bbox3d_fields']:
+            if key != 'gt_bboxes_3d':
+                raise NotImplementedError(
+                    f'GlobalAlignment does not support 3d bbox {key}')
+
+        coords = input_dict['points'].coord.numpy()
+        inst_mask = input_dict['pts_instance_mask']
+        sem_mask = input_dict['pts_semantic_mask']
+
+        # select points from valid categories where we want to extract bboxes
+        valid_cat_mask = (sem_mask != self.ignore_index)
+        inst_ids = np.unique(inst_mask[valid_cat_mask])  # ids of valid insts
+        instance_bboxes = np.zeros((inst_ids.shape[0], 7))
+        inst_id2cat_id = {
+            inst_id: sem_mask[inst_mask == inst_id][0]
+            for inst_id in inst_ids
+        }
+        for bbox_idx, inst_id in enumerate(inst_ids):
+            cat_id = inst_id2cat_id[inst_id]
+            inst_coords = coords[inst_mask == inst_id]
+            bbox = self._bbox_from_points(inst_coords)
+            instance_bboxes[bbox_idx, :6] = bbox
+            instance_bboxes[bbox_idx, 6] = cat_id
+
+        # TODO: currently only DepthInstance3DBoxes is supported!
+        # TODO: may support yaw in the future
+        original_type = type(input_dict['gt_bboxes_3d'])
+        input_dict['gt_bboxes_3d'] = original_type(
+            instance_bboxes[:, :6],
+            box_dim=6,
+            with_yaw=False,
+            origin=(0.5, 0.5, 0.5))
+        if 'gt_labels_3d' in input_dict.keys():
+            input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long)
+
+    def __call__(self, input_dict):
+        """Call function to shuffle points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after global alignment, 'points' and keys in \
+                input_dict['bbox3d_fields'] are updated in the result dict.
+        """
+        assert 'axis_align_matrix' in input_dict['annos'].keys(), \
+            'axis_align_matrix is not provided in GlobalAlignment'
+
+        axis_align_matrix = input_dict['annos']['axis_align_matrix']
+        assert axis_align_matrix.shape == (4, 4), \
+            f'invalid shape {axis_align_matrix.shape} for axis_align_matrix'
+        rot_mat = axis_align_matrix[:3, :3]
+        trans_vec = axis_align_matrix[:3, -1]
+
+        self._check_rot_mat(rot_mat)
+        self._rot_points(input_dict, rot_mat)
+        self._trans_points(input_dict, trans_vec)
+        self._extract_bboxes(input_dict)
+
+        return input_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(rotation_axis={self.rotation_axis},'
+        repr_str += f' ignore_index={self.ignore_index})'
+        return repr_str
+
+
 @PIPELINES.register_module()
 class GlobalRotScaleTrans(object):
     """Apply global rotation, scaling and translation to a 3D scene.
diff --git a/tests/data/scannet/scannet_infos.pkl b/tests/data/scannet/scannet_infos.pkl
index 7ceba9efd5f4907da05985bb9103322d709a991a..d0fe1b839d248d140a595a9a20e5711ce1eeed77 100644
GIT binary patch
delta 278
zcmZ3Wx5i+@VMeCTj?KpzD_KFb3;P#dAU)Ye^a6-0AjZR3A0f!Vz>rvxSsb63lbN0u
zpPN`xlvz<2pCOnb<jv4pn2;gSUYHnEn55y&=*{BI(pH$9Arrv_)RbmXn9|wNUYHtG
zn5N;)oT1?7=jZhw2*89lLrGzJQfG!p!~&q&h{H<`Pv<|h(edy)`$O9gZ&cTkK0NL4
z6nh3opd<*m99nFBsN>MwL!F|B8<12S+K;RiqLzW7uH^8l!(Hizcd{LBn4HKY#(#JN
q(AcE<!!vCw4{bQKfAT>wIZ>ckVMc~nTVZBUVOB|Dc2a3+q8<PWd~nqO

delta 61
zcmZ2uut0CaVMZ2j25-jAM;R+wAuI>>FT7kJvGzjmphBO?d}3mggG9GLBss)*7+Fe7
G6ZHTrv=FNR

diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
index e2a94cb868..0370722654 100644
--- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
@@ -3,11 +3,13 @@
 import pytest
 import torch
 
-from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
+from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes,
+                          DepthInstance3DBoxes, LiDARInstance3DBoxes)
 from mmdet3d.core.points import DepthPoints, LiDARPoints
-from mmdet3d.datasets import (BackgroundPointsFilter, ObjectNoise,
-                              ObjectSample, PointShuffle, PointsRangeFilter,
-                              RandomFlip3D, VoxelBasedPointSampler)
+from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment,
+                              ObjectNoise, ObjectSample, RandomFlip3D,
+                              PointShuffle, PointsRangeFilter,
+                              VoxelBasedPointSampler)
 
 
 def test_remove_points_in_boxes():
@@ -221,6 +223,114 @@ def test_points_range_filter():
     assert repr_str == expected_repr_str
 
 
+def test_global_alignment():
+    np.random.seed(0)
+    valid_cat_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
+                     36, 39)
+    ignore_index = len(valid_cat_ids)
+    cat_ids2class = np.ones((41, ), dtype=np.int) * ignore_index
+    for class_id, cat_id in enumerate(valid_cat_ids):
+        cat_ids2class[cat_id] = class_id
+
+    global_alignment = GlobalAlignment(
+        rotation_axis=2, ignore_index=ignore_index)
+
+    points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
+                         np.float32).reshape(-1, 6)
+    sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin',
+                           np.long)
+    ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin',
+                           np.long)
+    annos = mmcv.load('tests/data/scannet/scannet_infos.pkl')
+    info = annos[0]
+    gt_bboxes_3d = info['annos']['gt_boxes_upright_depth']
+    axis_align_matrix = info['annos']['axis_align_matrix']
+    gt_labels_3d = info['annos']['class']
+
+    depth_points = DepthPoints(points.copy(), points_dim=6)
+    depth_bboxes = DepthInstance3DBoxes(
+        gt_bboxes_3d,
+        box_dim=gt_bboxes_3d.shape[-1],
+        with_yaw=False,
+        origin=(0.5, 0.5, 0.5))
+    sem_mask = cat_ids2class[sem_mask]
+
+    input_dict = dict(
+        points=depth_points.clone(),
+        gt_bboxes_3d=depth_bboxes,
+        bbox3d_fields=['gt_bboxes_3d'],
+        gt_labels_3d=gt_labels_3d,
+        annos=dict(axis_align_matrix=axis_align_matrix),
+        pts_instance_mask=ins_mask,
+        pts_semantic_mask=sem_mask)
+
+    input_dict = global_alignment(input_dict)
+    trans_depth_points = input_dict['points']
+    trans_depth_bboxes = input_dict['gt_bboxes_3d']
+    trans_bbox_labels = input_dict['gt_labels_3d']
+
+    # construct expected transformed points by affine transformation
+    pts = np.ones((points.shape[0], 4))
+    pts[:, :3] = points[:, :3]
+    trans_pts = np.dot(pts, axis_align_matrix.T)
+    expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1)
+
+    expected_bbox_labels = np.array(
+        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+    expected_depth_bboxes = np.array(
+        [[
+            -3.714606, -1.0654305, 0.6051854, 0.6297655, 1.9905674, 0.44288868,
+            0.
+        ],
+         [
+             -8.557551, -1.8178326, 0.20456636, 1.1263373, 2.7851129,
+             1.8631845, 0.
+         ],
+         [
+             -8.885854, -5.354957, 0.97720087, 0.9093195, 0.30981588, 0.566175,
+             0.
+         ],
+         [
+             -8.098918, -5.0357704, 0.03724962, 0.27458152, 0.20566699,
+             0.5532104, 0.
+         ],
+         [
+             -6.9733434, 0.33523083, -0.02958763, 1.2264912, 0.7187278,
+             2.2613325, 0.
+         ],
+         [
+             -5.36362, -1.6046655, 0.37014085, 2.8042943, 1.1057366,
+             0.31707314, 0.
+         ], [-2.6299255, -2.3314357, 1.4469249, 0., 0., 0., 0.],
+         [-5.201888, -1.014641, 0.11020403, 0., 0., 0., 0.],
+         [
+             -3.5216672, -6.8292904, 0.26571387, 0.13945593, 0.12182455,
+             0.02463818, 0.
+         ],
+         [
+             -6.4834313, -5.4506774, 0.13558027, 1.4790803, 0.6031074,
+             0.60305846, 0.
+         ],
+         [
+             -9.338867, -4.616579, 0.6112565, 0.17650154, 0.988079, 0.16838372,
+             0.
+         ], [-2.0639155, -1.245964, 0.30754995, 0., 0., 0., 0.],
+         [-2.002855, -1.9495802, 2.2899528, 0., 0., 0., 0.],
+         [-2.1240144, -3.751592, 0.92695427, 0., 0., 0., 0.],
+         [-3.6406162, -5.1366153, 0.25374442, 0., 0., 0., 0.]])
+
+    assert np.allclose(
+        trans_depth_points.tensor.numpy(), expected_points, atol=1e-6)
+    assert np.all(trans_bbox_labels == expected_bbox_labels)
+    assert np.allclose(
+        trans_depth_bboxes.tensor.numpy(), expected_depth_bboxes, atol=1e-6)
+
+    repr_str = repr(global_alignment)
+    expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \
+                        f' ignore_index={ignore_index})'
+    assert repr_str == expected_repr_str
+
+
 def test_random_flip_3d():
     random_flip_3d = RandomFlip3D(
         flip_ratio_bev_horizontal=1.0, flip_ratio_bev_vertical=1.0)

From 6af6107863261b93a336d47df1014b29ffb7eb80 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Wed, 14 Apr 2021 15:37:41 +0800
Subject: [PATCH 04/12] fix small bugs in GlobalAlignment

---
 mmdet3d/datasets/pipelines/transforms_3d.py    | 18 +++++++++---------
 .../test_augmentations/test_transforms_3d.py   |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py
index d5f46c47df..5bd3d88056 100644
--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -384,18 +384,18 @@ def _extract_bboxes(self, input_dict):
             dict: Results after extracting bboxes, keys in \
                 input_dict['bbox3d_fields'] are updated in the dict.
         """
+        # TODO: this function is only used in ScanNet-Det pipeline currently
+        # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes
+        if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']:
+            return
+        assert len(input_dict['bbox3d_fields']) == 1, \
+            'GlobalAlignment only support gt_bboxes_3d'
+
         assert 'pts_instance_mask' in input_dict.keys(), \
             'instance mask is not provided in GlobalAlignment'
         assert 'pts_semantic_mask' in input_dict.keys(), \
             'semantic mask is not provided in GlobalAlignment'
 
-        # TODO: this function is only used in ScanNet-Det currently
-        # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes
-        for key in input_dict['bbox3d_fields']:
-            if key != 'gt_bboxes_3d':
-                raise NotImplementedError(
-                    f'GlobalAlignment does not support 3d bbox {key}')
-
         coords = input_dict['points'].coord.numpy()
         inst_mask = input_dict['pts_instance_mask']
         sem_mask = input_dict['pts_semantic_mask']
@@ -436,10 +436,10 @@ def __call__(self, input_dict):
             dict: Results after global alignment, 'points' and keys in \
                 input_dict['bbox3d_fields'] are updated in the result dict.
         """
-        assert 'axis_align_matrix' in input_dict['annos'].keys(), \
+        assert 'axis_align_matrix' in input_dict['ann_info'].keys(), \
             'axis_align_matrix is not provided in GlobalAlignment'
 
-        axis_align_matrix = input_dict['annos']['axis_align_matrix']
+        axis_align_matrix = input_dict['ann_info']['axis_align_matrix']
         assert axis_align_matrix.shape == (4, 4), \
             f'invalid shape {axis_align_matrix.shape} for axis_align_matrix'
         rot_mat = axis_align_matrix[:3, :3]
diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
index 0370722654..bd820abf64 100644
--- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
@@ -260,7 +260,7 @@ def test_global_alignment():
         gt_bboxes_3d=depth_bboxes,
         bbox3d_fields=['gt_bboxes_3d'],
         gt_labels_3d=gt_labels_3d,
-        annos=dict(axis_align_matrix=axis_align_matrix),
+        ann_info=dict(axis_align_matrix=axis_align_matrix),
         pts_instance_mask=ins_mask,
         pts_semantic_mask=sem_mask)
 

From db025c1997a8edce632deccbf9873e96a59b6472 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Wed, 14 Apr 2021 16:07:09 +0800
Subject: [PATCH 05/12] modify ScanNetDataset class to support
 axis_align_matrix loading & modify ScanNet config file

---
 configs/_base_/datasets/scannet-3d-18class.py |  3 ++
 mmdet3d/datasets/scannet_dataset.py           | 15 ++++++-
 .../test_datasets/test_scannet_dataset.py     | 37 +++++++++++-------
 .../test_pipelines/test_indoor_pipeline.py    | 39 ++++++++++++-------
 4 files changed, 63 insertions(+), 31 deletions(-)

diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py
index 3d3dc1e62f..8c8821d581 100644
--- a/configs/_base_/datasets/scannet-3d-18class.py
+++ b/configs/_base_/datasets/scannet-3d-18class.py
@@ -23,6 +23,9 @@
         valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
                        36, 39),
         max_cat_id=40),
+    dict(
+        type='GlobalAlignment', rotation_axis=2,
+        ignore_index=len(class_names)),
     dict(type='IndoorPointSample', num_points=40000),
     dict(
         type='RandomFlip3D',
diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py
index 51336c5dd4..870c9003fb 100644
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -1,5 +1,6 @@
 import numpy as np
 import tempfile
+import warnings
 from os import path as osp
 
 from mmdet3d.core import show_result, show_seg_result
@@ -79,6 +80,8 @@ def get_ann_info(self, index):
                 - gt_labels_3d (np.ndarray): Labels of ground truths.
                 - pts_instance_mask_path (str): Path of instance masks.
                 - pts_semantic_mask_path (str): Path of semantic masks.
+                - axis_align_matrix (np.ndarray): Transformation matrix for \
+                    global scene alignment.
         """
         # Use index to get the annos, thus the evalhook could also use this api
         info = self.data_infos[index]
@@ -102,11 +105,21 @@ def get_ann_info(self, index):
         pts_semantic_mask_path = osp.join(self.data_root,
                                           info['pts_semantic_mask_path'])
 
+        if 'axis_align_matrix' in info['annos'].keys():
+            axis_align_matrix = info['annos']['axis_align_matrix'].astype(
+                np.float32)
+        else:
+            axis_align_matrix = np.eye(4).astype(np.float32)
+            warnings.warn(
+                'axis_align_matrix is not found in ScanNet data info, please '
+                'use new pre-process scripts to re-generate ScanNet data')
+
         anns_results = dict(
             gt_bboxes_3d=gt_bboxes_3d,
             gt_labels_3d=gt_labels_3d,
             pts_instance_mask_path=pts_instance_mask_path,
-            pts_semantic_mask_path=pts_semantic_mask_path)
+            pts_semantic_mask_path=pts_semantic_mask_path,
+            axis_align_matrix=axis_align_matrix)
         return anns_results
 
     def _build_default_pipeline(self):
diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py
index 69791a4fae..31ded87e36 100644
--- a/tests/test_data/test_datasets/test_scannet_dataset.py
+++ b/tests/test_data/test_datasets/test_scannet_dataset.py
@@ -27,6 +27,14 @@ def test_getitem():
             with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
+        dict(
+            type='PointSegClassMapping',
+            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
+                           34, 36, 39)),
+        dict(
+            type='GlobalAlignment',
+            rotation_axis=2,
+            ignore_index=len(class_names)),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -63,22 +71,21 @@ def test_getitem():
     assert file_name == './tests/data/scannet/points/scene0000_00.bin'
     assert np.allclose(pcd_rotation, expected_rotation, 1e-3)
     assert sample_idx == 'scene0000_00'
-    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
-                                    [-0.4065, -3.4857, 2.1330, 2.1682],
-                                    [-1.4578, 1.3510, -0.0441, -0.0089],
-                                    [2.2428, -1.1323, -0.0288, 0.0064],
-                                    [0.7052, -2.9752, 1.5560, 1.5912]])
+    expected_points = torch.tensor(
+        [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00],
+         [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00],
+         [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03],
+         [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
+         [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
     expected_gt_bboxes_3d = torch.tensor(
-        [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],
-         [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],
-         [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],
-         [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],
-         [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])
-    expected_gt_labels = np.array([
-        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
-        0, 0, 0, 5, 5, 5
-    ])
-    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
+        [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000],
+         [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000],
+         [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000],
+         [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000],
+         [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]])
+    expected_gt_labels = np.array(
+        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+    expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
     expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
     original_classes = scannet_dataset.CLASSES
 
diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py
index 31b60ef69f..9d24467bae 100644
--- a/tests/test_data/test_pipelines/test_indoor_pipeline.py
+++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py
@@ -27,6 +27,14 @@ def test_scannet_pipeline():
             with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
+        dict(
+            type='PointSegClassMapping',
+            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
+                           34, 36, 39)),
+        dict(
+            type='GlobalAlignment',
+            rotation_axis=2,
+            ignore_index=len(class_names)),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -66,6 +74,8 @@ def test_scannet_pipeline():
     results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(
         scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
     results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
+    results['ann_info']['axis_align_matrix'] = \
+        info['annos']['axis_align_matrix']
 
     results['img_fields'] = []
     results['bbox3d_fields'] = []
@@ -79,22 +89,21 @@ def test_scannet_pipeline():
     gt_labels_3d = results['gt_labels_3d']._data
     pts_semantic_mask = results['pts_semantic_mask']._data
     pts_instance_mask = results['pts_instance_mask']._data
-    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
-                                    [-0.4065, -3.4857, 2.1330, 2.1682],
-                                    [-1.4578, 1.3510, -0.0441, -0.0089],
-                                    [2.2428, -1.1323, -0.0288, 0.0064],
-                                    [0.7052, -2.9752, 1.5560, 1.5912]])
+    expected_points = torch.tensor(
+        [[1.8339e+00, 2.1093e+00, 2.2900e+00, 2.3895e+00],
+         [3.6079e+00, 1.4592e-01, 2.0687e+00, 2.1682e+00],
+         [4.1886e+00, 5.0614e+00, -1.0841e-01, -8.8736e-03],
+         [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
+         [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
     expected_gt_bboxes_3d = torch.tensor(
-        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
-         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
-         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
-         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
-         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
-    expected_gt_labels_3d = np.array([
-        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
-        0, 0, 0, 5, 5, 5
-    ])
-    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
+        [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000],
+         [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000],
+         [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000],
+         [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000],
+         [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]])
+    expected_gt_labels_3d = np.array(
+        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+    expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
     expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
     assert torch.allclose(points, expected_points, 1e-2)
     assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,

From da9b64996bd1f5df872560d8ebbeece5e1811ed1 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Fri, 16 Apr 2021 16:01:15 +0800
Subject: [PATCH 06/12] add assertion message for rotate

---
 mmdet3d/core/bbox/structures/cam_box3d.py   | 3 ++-
 mmdet3d/core/bbox/structures/depth_box3d.py | 3 ++-
 mmdet3d/core/bbox/structures/lidar_box3d.py | 3 ++-
 mmdet3d/core/points/base_points.py          | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/mmdet3d/core/bbox/structures/cam_box3d.py b/mmdet3d/core/bbox/structures/cam_box3d.py
index d72391b160..0d804c1601 100644
--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
@@ -185,7 +185,8 @@ def rotate(self, angle, points=None):
         """
         if not isinstance(angle, torch.Tensor):
             angle = self.tensor.new_tensor(angle)
-        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
+            f'invalid rotation angle shape {angle.shape}'
 
         if angle.numel() == 1:
             rot_sin = torch.sin(angle)
diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py
index 8e619114f9..c5aeceaebf 100644
--- a/mmdet3d/core/bbox/structures/depth_box3d.py
+++ b/mmdet3d/core/bbox/structures/depth_box3d.py
@@ -132,7 +132,8 @@ def rotate(self, angle, points=None):
         """
         if not isinstance(angle, torch.Tensor):
             angle = self.tensor.new_tensor(angle)
-        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
+            f'invalid rotation angle shape {angle.shape}'
 
         if angle.numel() == 1:
             rot_sin = torch.sin(angle)
diff --git a/mmdet3d/core/bbox/structures/lidar_box3d.py b/mmdet3d/core/bbox/structures/lidar_box3d.py
index 2acd8cf60e..f7f1721f1e 100644
--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
@@ -130,7 +130,8 @@ def rotate(self, angle, points=None):
         """
         if not isinstance(angle, torch.Tensor):
             angle = self.tensor.new_tensor(angle)
-        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
+            f'invalid rotation angle shape {angle.shape}'
 
         if angle.numel() == 1:
             rot_sin = torch.sin(angle)
diff --git a/mmdet3d/core/points/base_points.py b/mmdet3d/core/points/base_points.py
index 467b3bb4de..aa13fc023c 100644
--- a/mmdet3d/core/points/base_points.py
+++ b/mmdet3d/core/points/base_points.py
@@ -147,7 +147,7 @@ def rotate(self, rotation, axis=None):
         if not isinstance(rotation, torch.Tensor):
             rotation = self.tensor.new_tensor(rotation)
         assert rotation.shape == torch.Size([3, 3]) or \
-            rotation.numel() == 1
+            rotation.numel() == 1, f'invalid rotation shape {rotation.shape}'
 
         if axis is None:
             axis = self.rotation_axis

From 56f59b6ca0c1534c24314c1cdf6a5fc0e82c2b05 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 20 Apr 2021 15:42:33 +0800
Subject: [PATCH 07/12] add exception in show_result when bbox shape is invalid

---
 mmdet3d/core/visualizer/show_result.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mmdet3d/core/visualizer/show_result.py b/mmdet3d/core/visualizer/show_result.py
index c4f414ad3a..e7b4789e0a 100644
--- a/mmdet3d/core/visualizer/show_result.py
+++ b/mmdet3d/core/visualizer/show_result.py
@@ -61,7 +61,10 @@ def convert_oriented_box_to_trimesh_fmt(box):
         scene_bbox = np.zeros((1, 7))
     scene = trimesh.scene.Scene()
     for box in scene_bbox:
-        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
+        try:
+            scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
+        except ValueError:  # invalid box shape, e.g. width==0
+            continue
 
     mesh_list = trimesh.util.concatenate(scene.dump())
     # save to obj file

From 8d0205e08d312d53710274a581d2483490579cd0 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 20 Apr 2021 15:43:10 +0800
Subject: [PATCH 08/12] use pipeline to load bbox

---
 configs/_base_/datasets/scannet-3d-18class.py |  24 ++-
 mmdet3d/datasets/pipelines/transforms_3d.py   |  27 +--
 mmdet3d/datasets/scannet_dataset.py           | 156 ++++++++++++++++--
 .../test_datasets/test_scannet_dataset.py     | 111 ++++++++-----
 .../test_augmentations/test_transforms_3d.py  |   9 +-
 .../test_pipelines/test_indoor_pipeline.py    |   7 +-
 6 files changed, 262 insertions(+), 72 deletions(-)

diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py
index 8c8821d581..c34a575961 100644
--- a/configs/_base_/datasets/scannet-3d-18class.py
+++ b/configs/_base_/datasets/scannet-3d-18class.py
@@ -5,6 +5,8 @@
                'bookshelf', 'picture', 'counter', 'desk', 'curtain',
                'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
                'garbagebin')
+valid_class_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,
+                   39)
 train_pipeline = [
     dict(
         type='LoadPointsFromFile',
@@ -14,8 +16,8 @@
         use_dim=[0, 1, 2]),
     dict(
         type='LoadAnnotations3D',
-        with_bbox_3d=True,
-        with_label_3d=True,
+        with_bbox_3d=False,
+        with_label_3d=False,
         with_mask_3d=True,
         with_seg_3d=True),
     dict(
@@ -52,6 +54,9 @@
         shift_height=True,
         load_dim=6,
         use_dim=[0, 1, 2]),
+    dict(
+        type='GlobalAlignment', rotation_axis=2,
+        ignore_index=len(class_names)),
     dict(
         type='MultiScaleFlipAug3D',
         img_scale=(1333, 800),
@@ -78,6 +83,7 @@
 ]
 # construct a pipeline for data and gt loading in show function
 # please keep its loading function consistent with test_pipeline (e.g. client)
+# we need to load gt masks for aligned gt bbox extracting
 eval_pipeline = [
     dict(
         type='LoadPointsFromFile',
@@ -85,11 +91,23 @@
         shift_height=False,
         load_dim=6,
         use_dim=[0, 1, 2]),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=True,
+        with_seg_3d=True),
+    dict(type='PointSegClassMapping', valid_cat_ids=valid_class_ids),
+    dict(
+        type='GlobalAlignment',
+        rotation_axis=2,
+        ignore_index=len(class_names),
+        extract_bbox=True),
     dict(
         type='DefaultFormatBundle3D',
         class_names=class_names,
         with_label=False),
-    dict(type='Collect3D', keys=['points'])
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 ]
 
 data = dict(
diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py
index 5bd3d88056..1aff3924e1 100644
--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -301,6 +301,9 @@ class GlobalAlignment(object):
     Args:
         rotation_axis (int): Rotation axis for points and bboxes rotation.
         ignore_index (int): Label index for which we won't extract bboxes.
+        extract_bbox (bool): Whether extract new ground-truth bboxes after \
+            alignment. This requires instance and semantic mask inputs.
+            Defaults to False.
 
     Note:
         This function should be called after PointSegClassMapping in pipeline.
@@ -311,9 +314,10 @@ class GlobalAlignment(object):
             bounding boxes for evaluation.
     """
 
-    def __init__(self, rotation_axis, ignore_index):
+    def __init__(self, rotation_axis, ignore_index, extract_bbox=False):
         self.rotation_axis = rotation_axis
         self.ignore_index = ignore_index
+        self.extract_bbox = extract_bbox
 
     def _trans_points(self, input_dict, trans_factor):
         """Private function to translate points.
@@ -386,10 +390,7 @@ def _extract_bboxes(self, input_dict):
         """
         # TODO: this function is only used in ScanNet-Det pipeline currently
         # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes
-        if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']:
-            return
-        assert len(input_dict['bbox3d_fields']) == 1, \
-            'GlobalAlignment only support gt_bboxes_3d'
+        from mmdet3d.core.bbox import DepthInstance3DBoxes
 
         assert 'pts_instance_mask' in input_dict.keys(), \
             'instance mask is not provided in GlobalAlignment'
@@ -415,16 +416,14 @@ def _extract_bboxes(self, input_dict):
             instance_bboxes[bbox_idx, :6] = bbox
             instance_bboxes[bbox_idx, 6] = cat_id
 
-        # TODO: currently only DepthInstance3DBoxes is supported!
-        # TODO: may support yaw in the future
-        original_type = type(input_dict['gt_bboxes_3d'])
-        input_dict['gt_bboxes_3d'] = original_type(
+        if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']:
+            input_dict['bbox3d_fields'].append('gt_bboxes_3d')
+        input_dict['gt_bboxes_3d'] = DepthInstance3DBoxes(
             instance_bboxes[:, :6],
             box_dim=6,
             with_yaw=False,
             origin=(0.5, 0.5, 0.5))
-        if 'gt_labels_3d' in input_dict.keys():
-            input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long)
+        input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long)
 
     def __call__(self, input_dict):
         """Call function to shuffle points.
@@ -448,14 +447,16 @@ def __call__(self, input_dict):
         self._check_rot_mat(rot_mat)
         self._rot_points(input_dict, rot_mat)
         self._trans_points(input_dict, trans_vec)
-        self._extract_bboxes(input_dict)
+        if self.extract_bbox:
+            self._extract_bboxes(input_dict)
 
         return input_dict
 
     def __repr__(self):
         repr_str = self.__class__.__name__
         repr_str += f'(rotation_axis={self.rotation_axis},'
-        repr_str += f' ignore_index={self.ignore_index})'
+        repr_str += f' ignore_index={self.ignore_index},'
+        repr_str += f' extract_bbox={self.extract_bbox})'
         return repr_str
 
 
diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py
index 870c9003fb..dc7e34cae1 100644
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -105,14 +105,7 @@ def get_ann_info(self, index):
         pts_semantic_mask_path = osp.join(self.data_root,
                                           info['pts_semantic_mask_path'])
 
-        if 'axis_align_matrix' in info['annos'].keys():
-            axis_align_matrix = info['annos']['axis_align_matrix'].astype(
-                np.float32)
-        else:
-            axis_align_matrix = np.eye(4).astype(np.float32)
-            warnings.warn(
-                'axis_align_matrix is not found in ScanNet data info, please '
-                'use new pre-process scripts to re-generate ScanNet data')
+        axis_align_matrix = self._get_axis_align_matrix(info)
 
         anns_results = dict(
             gt_bboxes_3d=gt_bboxes_3d,
@@ -122,6 +115,128 @@ def get_ann_info(self, index):
             axis_align_matrix=axis_align_matrix)
         return anns_results
 
+    def prepare_test_data(self, index):
+        """Prepare data for testing.
+
+        We should take axis_align_matrix from self.data_infos since we need \
+            to align point clouds.
+
+        Args:
+            index (int): Index for accessing the target data.
+
+        Returns:
+            dict: Testing data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        # take the axis_align_matrix from data_infos
+        input_dict['ann_info'] = dict(
+            axis_align_matrix=self._get_axis_align_matrix(
+                self.data_infos[index]))
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        return example
+
+    @staticmethod
+    def _get_axis_align_matrix(info):
+        """Get axis_align_matrix from info. If not exist, return identity mat.
+
+        Args:
+            info (dict): one data info term.
+
+        Returns:
+            np.ndarray: 4x4 transformation matrix.
+        """
+        if 'axis_align_matrix' in info['annos'].keys():
+            return info['annos']['axis_align_matrix'].astype(np.float32)
+        else:
+            warnings.warn(
+                'axis_align_matrix is not found in ScanNet data info, please '
+                'use new pre-process scripts to re-generate ScanNet data')
+            return np.eye(4).astype(np.float32)
+
+    def evaluate(self,
+                 results,
+                 metric=None,
+                 iou_thr=(0.25, 0.5),
+                 logger=None,
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluate.
+
+        Evaluation in indoor protocol.
+        Since ScanNet detection data pipeline re-computes ground-truth boxes,
+            we can't directly use gt_bboxes from self.data_infos.
+
+        Args:
+            results (list[dict]): List of results.
+            metric (str | list[str]): Metrics to be evaluated.
+            iou_thr (list[float]): AP IoU thresholds.
+            show (bool): Whether to visualize.
+                Default: False.
+            out_dir (str): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+
+        Returns:
+            dict: Evaluation results.
+        """
+        from mmdet3d.core.evaluation import indoor_eval
+        assert isinstance(
+            results, list), f'Expect results to be list, got {type(results)}.'
+        assert len(results) > 0, 'Expect length of results > 0.'
+        assert len(results) == len(self.data_infos)
+        assert isinstance(
+            results[0], dict
+        ), f'Expect elements in results to be dict, got {type(results[0])}.'
+        # load gt_bboxes via pipeline
+        pipeline = self._get_pipeline(pipeline)
+        gt_bboxes = [
+            self._extract_data(
+                i, pipeline, ['gt_bboxes_3d', 'gt_labels_3d'], load_annos=True)
+            for i in range(len(self.data_infos))
+        ]
+        gt_annos = [self._build_annos(*gt_bbox) for gt_bbox in gt_bboxes]
+        label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
+        ret_dict = indoor_eval(
+            gt_annos,
+            results,
+            iou_thr,
+            label2cat,
+            logger=logger,
+            box_type_3d=self.box_type_3d,
+            box_mode_3d=self.box_mode_3d)
+        if show:
+            self.show(results, out_dir, pipeline=pipeline)
+
+        return ret_dict
+
+    @staticmethod
+    def _build_annos(gt_bboxes, gt_labels):
+        """Transform gt bboxes and labels into self.data_infos['annos'] format.
+
+        Args:
+            gt_bboxes (:obj:`BaseInstance3DBoxes`): \
+                3D bounding boxes in Depth coordinate
+            gt_labels (torch.Tensor): Labels of boxes.
+
+        Returns:
+            dict: annotations including the following keys
+
+                - gt_boxes_upright_depth (np.ndarray): 3D bounding boxes.
+                - class (np.ndarray): Labels of boxes.
+                - gt_num (int): Number of boxes.
+        """
+        bbox = gt_bboxes.tensor.numpy()[:, :6].copy()  # drop yaw dimension
+        bbox[..., 2] += bbox[..., 5] / 2  # bottom center to gravity center
+        anno = {
+            'gt_boxes_upright_depth': bbox,
+            'class': gt_labels.numpy(),
+            'gt_num': gt_labels.shape[0]
+        }
+        return anno
+
     def _build_default_pipeline(self):
         """Build the default pipeline for this dataset."""
         pipeline = [
@@ -131,11 +246,28 @@ def _build_default_pipeline(self):
                 shift_height=False,
                 load_dim=6,
                 use_dim=[0, 1, 2]),
+            dict(
+                type='LoadAnnotations3D',
+                with_bbox_3d=False,
+                with_label_3d=False,
+                with_mask_3d=True,
+                with_seg_3d=True),
+            dict(
+                type='PointSegClassMapping',
+                valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
+                               33, 34, 36, 39)),
+            dict(
+                type='GlobalAlignment',
+                rotation_axis=2,
+                ignore_index=len(self.CLASSES),
+                extract_bbox=True),
             dict(
                 type='DefaultFormatBundle3D',
                 class_names=self.CLASSES,
                 with_label=False),
-            dict(type='Collect3D', keys=['points'])
+            dict(
+                type='Collect3D',
+                keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
         ]
         return Compose(pipeline)
 
@@ -155,8 +287,10 @@ def show(self, results, out_dir, show=True, pipeline=None):
             data_info = self.data_infos[i]
             pts_path = data_info['pts_path']
             file_name = osp.split(pts_path)[-1].split('.')[0]
-            points = self._extract_data(i, pipeline, 'points').numpy()
-            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
+            points, gt_bboxes = self._extract_data(
+                i, pipeline, ['points', 'gt_bboxes_3d'], load_annos=True)
+            points = points.numpy()
+            gt_bboxes = gt_bboxes.tensor.numpy()
             pred_bboxes = result['boxes_3d'].tensor.numpy()
             show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,
                         show)
diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py
index 31ded87e36..37ea8ccba4 100644
--- a/tests/test_data/test_datasets/test_scannet_dataset.py
+++ b/tests/test_data/test_datasets/test_scannet_dataset.py
@@ -34,7 +34,8 @@ def test_getitem():
         dict(
             type='GlobalAlignment',
             rotation_axis=2,
-            ignore_index=len(class_names)),
+            ignore_index=len(class_names),
+            extract_bbox=True),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -130,47 +131,65 @@ def test_evaluate():
     results = []
     pred_boxes = dict()
     pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
-        torch.tensor([[
-            1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01,
-            5.7235e-01, 0.0000e+00
-        ],
-                      [
-                          2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01,
-                          1.7072e-01, 6.7154e-01, 0.0000e+00
-                      ],
-                      [
-                          1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01,
-                          2.5346e+00, 1.2145e+00, 0.0000e+00
-                      ],
-                      [
-                          2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01,
-                          1.8428e+00, 2.8697e-01, 0.0000e+00
-                      ],
-                      [
-                          -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01,
-                          3.8582e+00, 2.1603e+00, 0.0000e+00
-                      ],
-                      [
-                          -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00,
-                          2.1603e-01, 1.2767e+00, 0.0000e+00
-                      ],
-                      [
-                          -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00,
-                          6.8676e-01, 1.0586e+00, 0.0000e+00
-                      ],
-                      [
-                          -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00,
-                          1.4927e+00, 2.3380e+00, 0.0000e+00
-                      ]]))
-    pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11])
+        torch.tensor(
+            [[-3.7146, -1.0654, 0.6052, 0.6298, 1.9906, 0.4429, 0.0000],
+             [-8.5576, -1.8178, 0.2046, 1.1263, 2.7851, 1.8632, 0.0000],
+             [-8.8859, -5.3550, 0.9772, 0.9093, 0.3098, 0.5662, 0.0000],
+             [-8.0989, -5.0358, 0.0372, 0.2746, 0.2057, 0.5532, 0.0000],
+             [-6.9733, 0.3352, -0.0296, 1.2265, 0.7187, 2.2613, 0.0000],
+             [-5.3636, -1.6047, 0.3701, 2.8043, 1.1057, 0.3171, 0.0000]]))
+    pred_boxes['labels_3d'] = torch.tensor([4, 11, 11, 10, 0, 3])
     pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5])
     results.append(pred_boxes)
     metric = [0.25, 0.5]
     ret_dict = scannet_dataset.evaluate(results, metric)
-    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01
-    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01
     assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01
+    assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01
+
+    # test evaluate with pipeline
+    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
+                   'window', 'bookshelf', 'picture', 'counter', 'desk',
+                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',
+                   'sink', 'bathtub', 'garbagebin')
+    eval_pipeline = [
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='DEPTH',
+            shift_height=False,
+            load_dim=6,
+            use_dim=[0, 1, 2]),
+        dict(
+            type='LoadAnnotations3D',
+            with_bbox_3d=False,
+            with_label_3d=False,
+            with_mask_3d=True,
+            with_seg_3d=True),
+        dict(
+            type='PointSegClassMapping',
+            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
+                           34, 36, 39)),
+        dict(
+            type='GlobalAlignment',
+            rotation_axis=2,
+            ignore_index=len(class_names),
+            extract_bbox=True),
+        dict(
+            type='DefaultFormatBundle3D',
+            class_names=class_names,
+            with_label=False),
+        dict(
+            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+    ]
+    ret_dict = scannet_dataset.evaluate(
+        results, metric, pipeline=eval_pipeline)
+    assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01
+    assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01
+    assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01
 
 
 def test_show():
@@ -233,11 +252,27 @@ def test_show():
             shift_height=False,
             load_dim=6,
             use_dim=[0, 1, 2]),
+        dict(
+            type='LoadAnnotations3D',
+            with_bbox_3d=False,
+            with_label_3d=False,
+            with_mask_3d=True,
+            with_seg_3d=True),
+        dict(
+            type='PointSegClassMapping',
+            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
+                           34, 36, 39)),
+        dict(
+            type='GlobalAlignment',
+            rotation_axis=2,
+            ignore_index=len(class_names),
+            extract_bbox=True),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
             with_label=False),
-        dict(type='Collect3D', keys=['points'])
+        dict(
+            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
     ]
     tmp_dir = tempfile.TemporaryDirectory()
     temp_dir = tmp_dir.name
diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
index bd820abf64..8f672a2f65 100644
--- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
@@ -7,8 +7,8 @@
                           DepthInstance3DBoxes, LiDARInstance3DBoxes)
 from mmdet3d.core.points import DepthPoints, LiDARPoints
 from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment,
-                              ObjectNoise, ObjectSample, RandomFlip3D,
-                              PointShuffle, PointsRangeFilter,
+                              ObjectNoise, ObjectSample, PointShuffle,
+                              PointsRangeFilter, RandomFlip3D,
                               VoxelBasedPointSampler)
 
 
@@ -233,7 +233,7 @@ def test_global_alignment():
         cat_ids2class[cat_id] = class_id
 
     global_alignment = GlobalAlignment(
-        rotation_axis=2, ignore_index=ignore_index)
+        rotation_axis=2, ignore_index=ignore_index, extract_bbox=True)
 
     points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
                          np.float32).reshape(-1, 6)
@@ -327,7 +327,8 @@ def test_global_alignment():
 
     repr_str = repr(global_alignment)
     expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \
-                        f' ignore_index={ignore_index})'
+                        f' ignore_index={ignore_index},' \
+                        f' extract_bbox=True)'
     assert repr_str == expected_repr_str
 
 
diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py
index 9d24467bae..5563dcd073 100644
--- a/tests/test_data/test_pipelines/test_indoor_pipeline.py
+++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py
@@ -23,8 +23,8 @@ def test_scannet_pipeline():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=True,
-            with_label_3d=True,
+            with_bbox_3d=False,
+            with_label_3d=False,
             with_mask_3d=True,
             with_seg_3d=True),
         dict(
@@ -34,7 +34,8 @@ def test_scannet_pipeline():
         dict(
             type='GlobalAlignment',
             rotation_axis=2,
-            ignore_index=len(class_names)),
+            ignore_index=len(class_names),
+            extract_bbox=True),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',

From 0a7abe787cb8e32259e51a4eaf1adb876acb01a7 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 20 Apr 2021 20:05:19 +0800
Subject: [PATCH 09/12] extract both aligned and unaligned bbox in
 pre-processing

---
 data/scannet/batch_load_scannet_data.py    | 17 +++---
 data/scannet/load_scannet_data.py          | 62 ++++++++++++++--------
 tools/data_converter/scannet_data_utils.py | 39 +++++++++-----
 3 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/data/scannet/batch_load_scannet_data.py b/data/scannet/batch_load_scannet_data.py
index b55b363309..60b53b3db4 100644
--- a/data/scannet/batch_load_scannet_data.py
+++ b/data/scannet/batch_load_scannet_data.py
@@ -34,8 +34,8 @@ def export_one_scan(scan_name,
                         scan_name + '_vh_clean_2.0.010000.segs.json')
     # includes axisAlignment info for the train set scans.
     meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt')
-    mesh_vertices, semantic_labels, instance_labels, instance_bboxes, \
-        instance2semantic, axis_align_matrix = export(
+    mesh_vertices, semantic_labels, instance_labels, unaligned_bboxes, \
+        aligned_bboxes, instance2semantic, axis_align_matrix = export(
             mesh_file, agg_file, seg_file, meta_file, label_map_file, None,
             test_mode)
 
@@ -48,9 +48,12 @@ def export_one_scan(scan_name,
         num_instances = len(np.unique(instance_labels))
         print(f'Num of instances: {num_instances}')
 
-        bbox_mask = np.in1d(instance_bboxes[:, -1], OBJ_CLASS_IDS)
-        instance_bboxes = instance_bboxes[bbox_mask, :]
-        print(f'Num of care instances: {instance_bboxes.shape[0]}')
+        bbox_mask = np.in1d(unaligned_bboxes[:, -1], OBJ_CLASS_IDS)
+        unaligned_bboxes = unaligned_bboxes[bbox_mask, :]
+        bbox_mask = np.in1d(aligned_bboxes[:, -1], OBJ_CLASS_IDS)
+        aligned_bboxes = aligned_bboxes[bbox_mask, :]
+        assert unaligned_bboxes.shape[0] == aligned_bboxes.shape[0]
+        print(f'Num of care instances: {unaligned_bboxes.shape[0]}')
 
     if max_num_point is not None:
         max_num_point = int(max_num_point)
@@ -66,7 +69,9 @@ def export_one_scan(scan_name,
     if not test_mode:
         np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels)
         np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels)
-        np.save(f'{output_filename_prefix}_bbox.npy', instance_bboxes)
+        np.save(f'{output_filename_prefix}_unaligned_bbox.npy',
+                unaligned_bboxes)
+        np.save(f'{output_filename_prefix}_aligned_bbox.npy', aligned_bboxes)
         np.save(f'{output_filename_prefix}_axis_align_matrix.npy',
                 axis_align_matrix)
 
diff --git a/data/scannet/load_scannet_data.py b/data/scannet/load_scannet_data.py
index 0cc20312a0..7e7cd55709 100644
--- a/data/scannet/load_scannet_data.py
+++ b/data/scannet/load_scannet_data.py
@@ -52,6 +52,29 @@ def read_segmentation(filename):
     return seg_to_verts, num_verts
 
 
+def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id,
+                 instance_ids):
+    num_instances = len(np.unique(list(object_id_to_segs.keys())))
+    instance_bboxes = np.zeros((num_instances, 7))
+    for obj_id in object_id_to_segs:
+        label_id = object_id_to_label_id[obj_id]
+        obj_pc = mesh_vertices[instance_ids == obj_id, 0:3]
+        if len(obj_pc) == 0:
+            continue
+        xmin = np.min(obj_pc[:, 0])
+        ymin = np.min(obj_pc[:, 1])
+        zmin = np.min(obj_pc[:, 2])
+        xmax = np.max(obj_pc[:, 0])
+        ymax = np.max(obj_pc[:, 1])
+        zmax = np.max(obj_pc[:, 2])
+        bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
+                         (zmin + zmax) / 2, xmax - xmin, ymax - ymin,
+                         zmax - zmin, label_id])
+        # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
+        instance_bboxes[obj_id - 1, :] = bbox
+    return instance_bboxes
+
+
 def export(mesh_file,
            agg_file,
            seg_file,
@@ -97,6 +120,13 @@ def export(mesh_file,
             break
     axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4))
 
+    # perform global alignment of mesh vertices
+    pts = np.ones((mesh_vertices.shape[0], 4))
+    pts[:, 0:3] = mesh_vertices[:, 0:3]
+    pts = np.dot(pts, axis_align_matrix.transpose())  # Nx4
+    aligned_mesh_vertices = np.concatenate([pts[:, 0:3], mesh_vertices[:, 3:]],
+                                           axis=1)
+
     # Load semantic and instance labels
     if not test_mode:
         object_id_to_segs, label_to_segs = read_aggregation(agg_file)
@@ -110,34 +140,21 @@ def export(mesh_file,
                 label_ids[verts] = label_id
         instance_ids = np.zeros(
             shape=(num_verts), dtype=np.uint32)  # 0: unannotated
-        num_instances = len(np.unique(list(object_id_to_segs.keys())))
         for object_id, segs in object_id_to_segs.items():
             for seg in segs:
                 verts = seg_to_verts[seg]
                 instance_ids[verts] = object_id
                 if object_id not in object_id_to_label_id:
                     object_id_to_label_id[object_id] = label_ids[verts][0]
-        instance_bboxes = np.zeros((num_instances, 7))
-        for obj_id in object_id_to_segs:
-            label_id = object_id_to_label_id[obj_id]
-            obj_pc = mesh_vertices[instance_ids == obj_id, 0:3]
-            if len(obj_pc) == 0:
-                continue
-            xmin = np.min(obj_pc[:, 0])
-            ymin = np.min(obj_pc[:, 1])
-            zmin = np.min(obj_pc[:, 2])
-            xmax = np.max(obj_pc[:, 0])
-            ymax = np.max(obj_pc[:, 1])
-            zmax = np.max(obj_pc[:, 2])
-            bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
-                             (zmin + zmax) / 2, xmax - xmin, ymax - ymin,
-                             zmax - zmin, label_id])
-            # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
-            instance_bboxes[obj_id - 1, :] = bbox
+        unaligned_bboxes = extract_bbox(mesh_vertices, object_id_to_segs,
+                                        object_id_to_label_id, instance_ids)
+        aligned_bboxes = extract_bbox(aligned_mesh_vertices, object_id_to_segs,
+                                      object_id_to_label_id, instance_ids)
     else:
         label_ids = None
         instance_ids = None
-        instance_bboxes = None
+        unaligned_bboxes = None
+        aligned_bboxes = None
         object_id_to_label_id = None
 
     if output_file is not None:
@@ -145,11 +162,12 @@ def export(mesh_file,
         if not test_mode:
             np.save(output_file + '_sem_label.npy', label_ids)
             np.save(output_file + '_ins_label.npy', instance_ids)
-            np.save(output_file + '_bbox.npy', instance_bboxes)
+            np.save(output_file + '_unaligned_bbox.npy', unaligned_bboxes)
+            np.save(output_file + '_aligned_bbox.npy', aligned_bboxes)
             np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix)
 
-    return mesh_vertices, label_ids, instance_ids, \
-        instance_bboxes, object_id_to_label_id, axis_align_matrix
+    return mesh_vertices, label_ids, instance_ids, unaligned_bboxes, \
+        aligned_bboxes, object_id_to_label_id, axis_align_matrix
 
 
 def main():
diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py
index 2f4ed60374..c94ab54156 100644
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
@@ -42,9 +42,15 @@ def __init__(self, root_path, split='train'):
     def __len__(self):
         return len(self.sample_id_list)
 
-    def get_box_label(self, idx):
+    def get_aligned_box_label(self, idx):
         box_file = osp.join(self.root_dir, 'scannet_instance_data',
-                            f'{idx}_bbox.npy')
+                            f'{idx}_aligned_bbox.npy')
+        mmcv.check_file_exist(box_file)
+        return np.load(box_file)
+
+    def get_unaligned_box_label(self, idx):
+        box_file = osp.join(self.root_dir, 'scannet_instance_data',
+                            f'{idx}_unaligned_bbox.npy')
         mmcv.check_file_exist(box_file)
         return np.load(box_file)
 
@@ -112,28 +118,35 @@ def process_single_scene(sample_idx):
 
             if has_label:
                 annotations = {}
-                boxes_with_classes = self.get_box_label(
-                    sample_idx)  # k, 6 + class
-                annotations['gt_num'] = boxes_with_classes.shape[0]
+                # box is of shape [k, 6 + class]
+                aligned_box_label = self.get_aligned_box_label(sample_idx)
+                unaligned_box_label = self.get_unaligned_box_label(sample_idx)
+                annotations['gt_num'] = aligned_box_label.shape[0]
                 if annotations['gt_num'] != 0:
-                    minmax_boxes3d = boxes_with_classes[:, :-1]  # k, 6
-                    classes = boxes_with_classes[:, -1]  # k, 1
+                    aligned_box = aligned_box_label[:, :-1]  # k, 6
+                    unaligned_box = unaligned_box_label[:, :-1]
+                    classes = aligned_box_label[:, -1]  # k
                     annotations['name'] = np.array([
                         self.label2cat[self.cat_ids2class[classes[i]]]
                         for i in range(annotations['gt_num'])
                     ])
-                    annotations['location'] = minmax_boxes3d[:, :3]
-                    annotations['dimensions'] = minmax_boxes3d[:, 3:6]
-                    annotations['gt_boxes_upright_depth'] = minmax_boxes3d
+                    # default names are given to aligned bbox for compatibility
+                    # we also save unaligned bbox info with marked names
+                    annotations['location'] = aligned_box[:, :3]
+                    annotations['dimensions'] = aligned_box[:, 3:6]
+                    annotations['gt_boxes_upright_depth'] = aligned_box
+                    annotations['unaligned_location'] = unaligned_box[:, :3]
+                    annotations['unaligned_dimensions'] = unaligned_box[:, 3:6]
+                    annotations[
+                        'unaligned_gt_boxes_upright_depth'] = unaligned_box
                     annotations['index'] = np.arange(
                         annotations['gt_num'], dtype=np.int32)
                     annotations['class'] = np.array([
                         self.cat_ids2class[classes[i]]
                         for i in range(annotations['gt_num'])
                     ])
-                axis_align_matrix = self.get_axis_align_matrix(
-                    sample_idx)  # [4, 4]
-                annotations['axis_align_matrix'] = axis_align_matrix
+                axis_align_matrix = self.get_axis_align_matrix(sample_idx)
+                annotations['axis_align_matrix'] = axis_align_matrix  # 4x4
                 info['annos'] = annotations
             return info
 

From bebbc2b53e5a2ba270052b1cb62e6ec67daf9ddc Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 20 Apr 2021 20:06:38 +0800
Subject: [PATCH 10/12] dataset read aligned bbox

---
 configs/_base_/datasets/scannet-3d-18class.py |  27 ++---
 mmdet3d/datasets/pipelines/transforms_3d.py   |  84 +------------
 mmdet3d/datasets/scannet_dataset.py           | 108 ++---------------
 tests/data/scannet/scannet_infos.pkl          | Bin 6188 -> 10105 bytes
 .../test_datasets/test_scannet_dataset.py     | 113 +++++++++---------
 .../test_augmentations/test_transforms_3d.py  |  85 +------------
 .../test_pipelines/test_indoor_pipeline.py    |  26 ++--
 7 files changed, 94 insertions(+), 349 deletions(-)

diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py
index c34a575961..b97e858ba5 100644
--- a/configs/_base_/datasets/scannet-3d-18class.py
+++ b/configs/_base_/datasets/scannet-3d-18class.py
@@ -16,18 +16,16 @@
         use_dim=[0, 1, 2]),
     dict(
         type='LoadAnnotations3D',
-        with_bbox_3d=False,
-        with_label_3d=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
         with_mask_3d=True,
         with_seg_3d=True),
+    dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='PointSegClassMapping',
         valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
                        36, 39),
         max_cat_id=40),
-    dict(
-        type='GlobalAlignment', rotation_axis=2,
-        ignore_index=len(class_names)),
     dict(type='IndoorPointSample', num_points=40000),
     dict(
         type='RandomFlip3D',
@@ -54,9 +52,7 @@
         shift_height=True,
         load_dim=6,
         use_dim=[0, 1, 2]),
-    dict(
-        type='GlobalAlignment', rotation_axis=2,
-        ignore_index=len(class_names)),
+    dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='MultiScaleFlipAug3D',
         img_scale=(1333, 800),
@@ -93,16 +89,11 @@
         use_dim=[0, 1, 2]),
     dict(
         type='LoadAnnotations3D',
-        with_bbox_3d=False,
-        with_label_3d=False,
-        with_mask_3d=True,
-        with_seg_3d=True),
-    dict(type='PointSegClassMapping', valid_cat_ids=valid_class_ids),
-    dict(
-        type='GlobalAlignment',
-        rotation_axis=2,
-        ignore_index=len(class_names),
-        extract_bbox=True),
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_mask_3d=False,
+        with_seg_3d=False),
+    dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='DefaultFormatBundle3D',
         class_names=class_names,
diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py
index 1aff3924e1..fa8509cb94 100644
--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -296,17 +296,11 @@ def __repr__(self):
 @PIPELINES.register_module()
 class GlobalAlignment(object):
     """Apply global alignment to 3D scene points by rotation and translation.
-    Extract 3D bboxes from the aligned points and instance mask if provided.
 
     Args:
         rotation_axis (int): Rotation axis for points and bboxes rotation.
-        ignore_index (int): Label index for which we won't extract bboxes.
-        extract_bbox (bool): Whether extract new ground-truth bboxes after \
-            alignment. This requires instance and semantic mask inputs.
-            Defaults to False.
 
     Note:
-        This function should be called after PointSegClassMapping in pipeline.
         We do not record the applied rotation and translation as in \
             GlobalRotScaleTrans. Because usually, we do not need to reverse \
             the alignment step.
@@ -314,10 +308,8 @@ class GlobalAlignment(object):
             bounding boxes for evaluation.
     """
 
-    def __init__(self, rotation_axis, ignore_index, extract_bbox=False):
+    def __init__(self, rotation_axis):
         self.rotation_axis = rotation_axis
-        self.ignore_index = ignore_index
-        self.extract_bbox = extract_bbox
 
     def _trans_points(self, input_dict, trans_factor):
         """Private function to translate points.
@@ -357,74 +349,6 @@ def _check_rot_mat(self, rot_mat):
         is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()
         assert is_valid, f'invalid rotation matrix {rot_mat}'
 
-    def _bbox_from_points(self, points):
-        """Get the bounding box of a set of points.
-
-        Args:
-            points (np.ndarray): A set of points belonging to one instance.
-
-        Returns:
-            np.ndarray: A bounding box of input points. We use origin as \
-                (0.5, 0.5, 0.5) without yaw.
-        """
-        xmin = np.min(points[:, 0])
-        ymin = np.min(points[:, 1])
-        zmin = np.min(points[:, 2])
-        xmax = np.max(points[:, 0])
-        ymax = np.max(points[:, 1])
-        zmax = np.max(points[:, 2])
-        bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
-                         (zmin + zmax) / 2, xmax - xmin, ymax - ymin,
-                         zmax - zmin])
-        return bbox
-
-    def _extract_bboxes(self, input_dict):
-        """Extract bounding boxes from points, semantic mask and instance mask.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after extracting bboxes, keys in \
-                input_dict['bbox3d_fields'] are updated in the dict.
-        """
-        # TODO: this function is only used in ScanNet-Det pipeline currently
-        # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes
-        from mmdet3d.core.bbox import DepthInstance3DBoxes
-
-        assert 'pts_instance_mask' in input_dict.keys(), \
-            'instance mask is not provided in GlobalAlignment'
-        assert 'pts_semantic_mask' in input_dict.keys(), \
-            'semantic mask is not provided in GlobalAlignment'
-
-        coords = input_dict['points'].coord.numpy()
-        inst_mask = input_dict['pts_instance_mask']
-        sem_mask = input_dict['pts_semantic_mask']
-
-        # select points from valid categories where we want to extract bboxes
-        valid_cat_mask = (sem_mask != self.ignore_index)
-        inst_ids = np.unique(inst_mask[valid_cat_mask])  # ids of valid insts
-        instance_bboxes = np.zeros((inst_ids.shape[0], 7))
-        inst_id2cat_id = {
-            inst_id: sem_mask[inst_mask == inst_id][0]
-            for inst_id in inst_ids
-        }
-        for bbox_idx, inst_id in enumerate(inst_ids):
-            cat_id = inst_id2cat_id[inst_id]
-            inst_coords = coords[inst_mask == inst_id]
-            bbox = self._bbox_from_points(inst_coords)
-            instance_bboxes[bbox_idx, :6] = bbox
-            instance_bboxes[bbox_idx, 6] = cat_id
-
-        if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']:
-            input_dict['bbox3d_fields'].append('gt_bboxes_3d')
-        input_dict['gt_bboxes_3d'] = DepthInstance3DBoxes(
-            instance_bboxes[:, :6],
-            box_dim=6,
-            with_yaw=False,
-            origin=(0.5, 0.5, 0.5))
-        input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long)
-
     def __call__(self, input_dict):
         """Call function to shuffle points.
 
@@ -447,16 +371,12 @@ def __call__(self, input_dict):
         self._check_rot_mat(rot_mat)
         self._rot_points(input_dict, rot_mat)
         self._trans_points(input_dict, trans_vec)
-        if self.extract_bbox:
-            self._extract_bboxes(input_dict)
 
         return input_dict
 
     def __repr__(self):
         repr_str = self.__class__.__name__
-        repr_str += f'(rotation_axis={self.rotation_axis},'
-        repr_str += f' ignore_index={self.ignore_index},'
-        repr_str += f' extract_bbox={self.extract_bbox})'
+        repr_str += f'(rotation_axis={self.rotation_axis})'
         return repr_str
 
 
diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py
index dc7e34cae1..1dfff7d7f7 100644
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -154,89 +154,6 @@ def _get_axis_align_matrix(info):
                 'use new pre-process scripts to re-generate ScanNet data')
             return np.eye(4).astype(np.float32)
 
-    def evaluate(self,
-                 results,
-                 metric=None,
-                 iou_thr=(0.25, 0.5),
-                 logger=None,
-                 show=False,
-                 out_dir=None,
-                 pipeline=None):
-        """Evaluate.
-
-        Evaluation in indoor protocol.
-        Since ScanNet detection data pipeline re-computes ground-truth boxes,
-            we can't directly use gt_bboxes from self.data_infos.
-
-        Args:
-            results (list[dict]): List of results.
-            metric (str | list[str]): Metrics to be evaluated.
-            iou_thr (list[float]): AP IoU thresholds.
-            show (bool): Whether to visualize.
-                Default: False.
-            out_dir (str): Path to save the visualization results.
-                Default: None.
-            pipeline (list[dict], optional): raw data loading for showing.
-                Default: None.
-
-        Returns:
-            dict: Evaluation results.
-        """
-        from mmdet3d.core.evaluation import indoor_eval
-        assert isinstance(
-            results, list), f'Expect results to be list, got {type(results)}.'
-        assert len(results) > 0, 'Expect length of results > 0.'
-        assert len(results) == len(self.data_infos)
-        assert isinstance(
-            results[0], dict
-        ), f'Expect elements in results to be dict, got {type(results[0])}.'
-        # load gt_bboxes via pipeline
-        pipeline = self._get_pipeline(pipeline)
-        gt_bboxes = [
-            self._extract_data(
-                i, pipeline, ['gt_bboxes_3d', 'gt_labels_3d'], load_annos=True)
-            for i in range(len(self.data_infos))
-        ]
-        gt_annos = [self._build_annos(*gt_bbox) for gt_bbox in gt_bboxes]
-        label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
-        ret_dict = indoor_eval(
-            gt_annos,
-            results,
-            iou_thr,
-            label2cat,
-            logger=logger,
-            box_type_3d=self.box_type_3d,
-            box_mode_3d=self.box_mode_3d)
-        if show:
-            self.show(results, out_dir, pipeline=pipeline)
-
-        return ret_dict
-
-    @staticmethod
-    def _build_annos(gt_bboxes, gt_labels):
-        """Transform gt bboxes and labels into self.data_infos['annos'] format.
-
-        Args:
-            gt_bboxes (:obj:`BaseInstance3DBoxes`): \
-                3D bounding boxes in Depth coordinate
-            gt_labels (torch.Tensor): Labels of boxes.
-
-        Returns:
-            dict: annotations including the following keys
-
-                - gt_boxes_upright_depth (np.ndarray): 3D bounding boxes.
-                - class (np.ndarray): Labels of boxes.
-                - gt_num (int): Number of boxes.
-        """
-        bbox = gt_bboxes.tensor.numpy()[:, :6].copy()  # drop yaw dimension
-        bbox[..., 2] += bbox[..., 5] / 2  # bottom center to gravity center
-        anno = {
-            'gt_boxes_upright_depth': bbox,
-            'class': gt_labels.numpy(),
-            'gt_num': gt_labels.shape[0]
-        }
-        return anno
-
     def _build_default_pipeline(self):
         """Build the default pipeline for this dataset."""
         pipeline = [
@@ -248,19 +165,11 @@ def _build_default_pipeline(self):
                 use_dim=[0, 1, 2]),
             dict(
                 type='LoadAnnotations3D',
-                with_bbox_3d=False,
-                with_label_3d=False,
-                with_mask_3d=True,
-                with_seg_3d=True),
-            dict(
-                type='PointSegClassMapping',
-                valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
-                               33, 34, 36, 39)),
-            dict(
-                type='GlobalAlignment',
-                rotation_axis=2,
-                ignore_index=len(self.CLASSES),
-                extract_bbox=True),
+                with_bbox_3d=True,
+                with_label_3d=True,
+                with_mask_3d=False,
+                with_seg_3d=False),
+            dict(type='GlobalAlignment', rotation_axis=2),
             dict(
                 type='DefaultFormatBundle3D',
                 class_names=self.CLASSES,
@@ -287,10 +196,9 @@ def show(self, results, out_dir, show=True, pipeline=None):
             data_info = self.data_infos[i]
             pts_path = data_info['pts_path']
             file_name = osp.split(pts_path)[-1].split('.')[0]
-            points, gt_bboxes = self._extract_data(
-                i, pipeline, ['points', 'gt_bboxes_3d'], load_annos=True)
-            points = points.numpy()
-            gt_bboxes = gt_bboxes.tensor.numpy()
+            points = self._extract_data(
+                i, pipeline, 'points', load_annos=True).numpy()
+            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
             pred_bboxes = result['boxes_3d'].tensor.numpy()
             show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,
                         show)
diff --git a/tests/data/scannet/scannet_infos.pkl b/tests/data/scannet/scannet_infos.pkl
index d0fe1b839d248d140a595a9a20e5711ce1eeed77..20595af6a5fece904a01fe9056a484a26e80eba0 100644
GIT binary patch
literal 10105
zcmd^F_g7TO^1lQ@F|0W(hS4=(#<0P(_hMRian&6cch)p8Fu*7bGz@EahFMgC0TC4u
z(<q9GRZtWJg>$O@iTB@pzn|(m<GTBFX5KsB_tUq}X{K*ib#+yBb%lEgChk+lag~ta
z8g3Xyd6~~2aOanlRTZej<0>h`H3GB#s#15MH!o0C;jL83*(t7Jm@M%X<W;zR1&7rT
zjZ~TM^?NhX+?kmwrOGu_$OS6h<#~Z(mFk)lp<cQ4L(C-}pI;4iO$;OZ{FQ+`f4<jU
znpb(yUOLP*9l*Z;{*^-7dkU4_(ma2_m;V8U;hMr<0RKv1glh=4$@BZmD%Hs2YE*_R
z1<yqRH-uN8Wv9E6@#xPh^{UbNm?}TAB)_b}yQH+LB;d=dsK`5#=C1JOmx2F^s{BBj
z8e?On`Jp9&s<GL|88t57onKbq&96-J`el%&#=8<g*rO)s{YvrzKL0W`u|`dDs>!u#
zic@`_k)4p8enzE-2^Iv7lw;jgR}$9cEL*Oo)z{Uk=}t8xBRfeGTdN$~wr$JK%udc8
z8c;Jm^~K{|qaocPEbxgG=mqFy=rZ@I_V3~cKqwF0gI<E}MUU3<0Tdpp(1j;iFKa5$
zKh^|aa0MROD<aST1-|G+_OC=g_%VWin-hK_@<#ZO^}^f7n$V5l#Y4Xc_(EUg6`jgh
z=420fj?ltTxBoW2@Gs|8h%UUuYQnehD)=%Mek1(Lv(OM-2~By9oN2uF^Pk|$UcyJj
zK0kF<K+!rD{>3ihY5%T0{C~rb_?*aj$I|J48(+;ThNJz$sb&Y%9FGoIC1v^Wz-4|l
zw|Gpk_So}^;f6DuYQA>E*-15OLGe^qB6u(SMlBTAyU3{)!}S)6)BWLp(Da{dBcPUO
z*ZVTbFu0l9XrRnPFS*C0dTOG^V@DT^u=vWb;^i*t=65FD8NJ%`<C;`!6x|<g&|AJX
zoP&IISTR@gol@>KxoO{6ZrkMI7be~0R=UnlI7H0}7Cjlqjni|ekK4?w1LHQ+y_{5Q
z`Vx9`V0Tio4c$mhQ~9|`eFgOPD7PF>;~)ih^4W#l^b4J%LE+<D>iL4ZOgdjUXCvK6
zH-J*TcfFg>^F84N7<7+rt>!l-y(plD#ZGES08TsgQK)?GjA0zyIgdizcf!wiOm3ax
z;pg0tmcl_FzoS36WB<2FT3)K%w@PH_UC%At#_iNN)H-^YFY=4rLm=n`5p{xU`G%QE
zuc>kNBEB%hN~0TGM>ofB<NGGv-)8YcZlBF{<}toa7x*c)^2=0<nt$S#Nnh@V9)wC3
z-T0ZWnlq`c@(2yi=QlJ+SIW6-WIBgZ!8yom2)H?DQU}+JrIs(3@^z@9huZmZ!3Mem
zNreoz6++WD%%$`Q65XKIoD|FAOKUgg(rxZGwf1N8l_mU#`7=HJ?I6E92!;mf6}N5}
znKd5ggN^9ilr4P0Ts1eFFL7T1l(mxTSMxJ|v=fS!ZRmIIUCoW=On#oTb0a;6IH8=v
zlnGQf)JE7&_vscln_CXAr~bX!e3=HF(0w;`l<=jbFp(}ApeKCY-2V%oPfh2xp%&e#
z;+lMFa`SV2&ip6|rgH-)z*p!MHE*Em6<~*Na|0}>hkMtFZp5S((vu1N%%rm!)IOfO
z52jk<=sCUE!F@QmZj+xRubfY(N(^guRU)_ZDfB-~YA@uApgWK)L}4`CG?!~(->Va;
zmv3^zH$yCb@C7}lx_z)tt>DL#)`-bmp$9zZp__C&)#C1KK24`)P<664nz8(N8V{IM
zL+_}5cL`rlvkZQa&pk8wJY9pfh2fNWhrsVay2lsS^E)0eEWYdHhRt+;qdt7W;VUi*
z;wt<`&A(c7st9!G_pIe<)I$%$WWLHT0)kp9-XYVemIc&uk1Gu~zQ9-N^;g0@RH_vb
z|FBa0!<SC=)qDT&b@5`!xVmudaX5GAJYC<$kGbFE5Z6`h<TKO>OW*+><U#D%y@T(W
z{0L$^=l1b)_<>2m-J9s*cihTnO!1Mcj$<-o2;YLeo!Z5BO}<a>4$x`t*qm$uPVKmI
zFHKk3Nxp5-0IWZA+Ujh_F(_U96rbJ4O(wN)&C;}-ulS}(&qo>52z$FVN|qORwj@|M
zyC&}C4tfXE7&N(a;g&rHJ>+wmgD*>Q-i7mj-A@hl3?8hQTB#GLFZc~u0|!Ce*9W*A
zohCKSTgANxaC>^pjT!WM<z&8xA@)h1N!^JS2Xk@u?dZ*5Wh)#*Ptq=G)MN~9vAByn
zOb+hPm`s=XDPJ?WWg)+$*U2X_1{La9g?rcnl&#C=(bezy7L-SQ^XUTJz=~GE8Opzd
zO*o21Zdks8o)z;&Gk4?;I(M9FU}!?6v7(eOnZIn~>ZCksPO%cEK?BXyK{bgM;COhx
z9oEcsv#5q2L)2ce?cjdy3Ru(yZ3D?MIYIn(H}zXk+G)N9@5zrcxWP;J_Syms%}El=
z?u0FO!1sr8@kEyv&{>HDuc0oG*}_jpbI6>`gNLYlZyC3nTZc@@`BuzAIO*Gi>+{f@
z3e#<ZUuniN#DYPt=R2^PTJ%AU*TR25=qIqUALp0R+XHX{RJ$9F@!~Rg1lfs#(_u8d
ziEu3Zz*<DXbubk=O}Cs=e8+5&rqHQG&?~3fWu-g6gAB=h+2`H87Wct6SddT0G<U)k
zU7ARD!A-Nd1ERNXN&tuL5j6051qX?%c6q_k14pUfT*P;|5vMc&!+WPW4xK0fiZOLR
z*DW56(|`}`ol4i%jFP*8%fW*)MuReJD!7*3(!-1_dSi001+!~#Kz^}2g9p-a9s}ma
z<Q!ZkI5k11^9xT<8~ng|jnYY1um)Bg9K8z>=LsI_%v?v-90cB5{77&{aobK{sBh%b
zuX3u@0ky`Xi%OJ$p0dN<N_SOxg|DbM;4bi%qoQ3KaRBSY0etIJrtJVyYLsO=fPz$T
z7f0NW*GI%o5Mnx!xKfDNo+f&d5yKC~lY&I|RdHk}ak#TMl9>2BUL2`T44)WBDioL7
zj3Zx)!?VVbQpMwZ<4C(=x##%uvY1Xhj>IhvKOZx<iz5)kk^aRMD`Mt|F@i~4DP-Ic
zC$3yGc2q+C7gstOD|p4uS7U~^xDwiUV_?k0H;y0)-Nej|;|Zj3q|fmL+xSxKctUc_
zG(3)&oh31Qy;E%nsEr=iSVZ$Ge_n~N$nPz1zc1)FMdI~liPzsb)t2`O`}cOdo`r;6
znuFeqvG9xX<t^1ZWN08U?njb=Q7%6ntLKo-*RMzrD1|DkP%FwP$gNppWe#~8JxiAn
zhmO6^Rf5Pr2Pf&-U|`G)85l_Guj=#+t1-GMDUo4VUV4Vo1l(jVM5!h-$G#pe6UK>|
zeB0h^*?R8NpdH*lRA7#MNXQ@RsUmE3SC8iLb>wA$LZIrCdiPxJvh0Kp96y<<89~u^
z&82sMWEWs=I#ITEeD=9ewoq@{jEA^76J@H*@tqu7#9(3OWWmA?Pb!dj3UJ!0kuv4r
zRuvXt99~=0Y1r0`5dy>Jr)O!Zjy=gpfiY!q3k^;YAP1+p_10jge;pD~%!yX<G5`Z>
zHaF@mt-W|Z(j-6#@D_3Ztwx-UT)0<{w*1OhG&qFHyF-gg!9uFlr4tx_iIde;1J=Ld
z)As36_X<O{ag4&*-O_Y8cx0MPad6jsjl`ivNG5e9z`-KEtrt*e?nztQhHtEWtQOxK
z84={}W<7HJ$elKN5QW9Vuwv&US=ZcylRj>+?aiRa>opwKafkY++bqya#72F*(BZ`*
ztqFr)?6*zo_#C??pkT(HFwgXG)<jvFJ8whS*h1Usm3_36?US;gj9MGWq2=j%p@s6|
z#ROSM!A($9r>TCB+m`KATLbDxk83<Yf8Mqa3gm5(`?g)~+YYDt>HU3k+V^dhecvF!
zIXYz<vcV5ssM|mjD$s>7h8X{5Pf{=qwtQLOc2SI9Ebpdz+rl`wWV>Avhjc|Oc(dib
zm&S9=3v1Lah%sl)TFS2k=%CKewYBC-EsO~cF8(QuT{_XeQHCyp0W<KXBV?Df)=U&(
z&usGH`d1L>3~G6OX>n*xbj?0hhpvAGK8E{kYX`U*Z=2eL0*CC{R}g+csn;Tt2HnfV
zt0x(R?7CMbxpj5KC>tQZ_6A0{<`wuZc#*4`rkHeFm_SgMya4A?Gu=+HFrwg$g{d-t
zK)T-5b12l_*mi(|d>6kKN_MS_ft8QKtV{DY-{Plwpo?6AQRg&USsa>#(npuKK$7wn
zU}HJn+UaVGf=RG@ebB~UUET`#YiLc^vJMB;Ct8>RTXs>4vEkeOUTN*PDVn&$E@%Zc
z$d~4z{KYVrE*!@D3XB|fIg61|R*C{%2AkdXz_Ke@nc~nATVE8+!W%HnB3>ry3Rd7F
zmPYK4dhj~5U+8eSdd08`rLnF>4ZQ2QWJdyrUAf9?zMYKXRz}}%3gb=HC96y=OAl-M
zAKYy_P6I(eidBIMUf2%Wt8BiEaG|<WuHny4l@n0AJT3>2|Gb8uC|Q4rTuGN)$zPpn
z_xqA{k9{Tgq{yzKw`d`dV%^xNF*4?Ljut_&b+p?KBZwAYqdEiJ#c0v@ldd94T#n{N
ztY|Siri-zn2K87D2wIF9^kX`xXjwrF=M^<+h~?R$B^9x}p7rUZBf3M3o{~V9(cER!
zj3t^cMF5PJ&BSoE94pz0;ft|*v|K2Lr;e5?#dPCQGN~9oK3bv`&Hv*FV<lm+B1DWN
zEk*<Z1E0*{Vnml{DPGjbL(%fT=wT^Jb|}FkO3>OXL2I8=?GGroNB_f(&tKp@ta2lf
zDNiDk$EoslWICe?!rxkbE0tHiwH7*6(Z|2F7VB@V@>yS60(v64q?pO*L(nC*;gzf*
z|ClrkUB2kZ|2&OAABjE+{WJ8@=wr~OkQ|3DDeeSx`5&c8=#$Z>pns08d?L{Sr#cu=
zCALKQC3%&Vsx%^zUnDAXs`9WzDk9N0szM~HbgIC|C92XA{S&){qw@P-kw}_IAx)%^
z7Woy)K5QbDG?7Z$-+%2y+}4D^BlIJ`q8VwT)&Hc4_ND#(SN$g1JLFV{1L}y!C2Zv#
z_Eoxd@s5As3RL(GtD_N3{zEi*%&CrxCX=(1Yt#ulYYAZ;$_Bpn8FgjJ2g@GrnztYw
z38*go05yz_)V7A|=oZyafN%Q~fd)TZsZsIgL;xQN&KB_-e7%m${q+}F8})cj^7VB`
d@Fi^UufV@Q0a=|CQG-tPdq7ots;csq{4XU`36TH*

literal 6188
zcmd@YYj+e?vhyH>0U;<6Feu{-B5DXCE)X6aMMY<gZey^QO`LZ!Lnbr1Gd+jkF`*R*
zpooYV9x>7c1Pn+B;6hjl=T!ZP{WrT+_fCg^p$9$dr@iOY%<ZbWb*t*$dUV^RtwQS&
zCAH>EP1CwE$+Q)WrZV}MDD4qtwdO3srt_V_ws_deXX80h9;h&95HgjFg|oqAtXoWz
zOu1-09j~Vmtgjapd9#wmS-D_W*h+}$=6qLrZuw-+x=1oDD$U0W%*k}l3a6v-U}rej
zp_a}tpCaNtgx^Z=O-RVaJHu%!8J#3yrj&3G;kOcInbT;Sa5|mIiAQ?G>{_#ep4+V;
z887ApYRqzaOouz;qAE(LuAOz!Og3KEnNL~Ca5fv>=?iA#(G2OI%||Vtn5(#aY4Q?N
z#iIdjuXrpPjAml-XwDZ;^Ta3SnI$ARBC6$nsj!tyuMm&-iTQ?Duum*B#1plFl0eN~
zQBx2YvvzjTx<zIgt!rNKig@zC{(a&pLoBWhlu2Rhgukh&DNr9M4^&#>>Bxb^JhO_d
z+d&JGYz0A#AVa{o2aVs$Z;_xdL4+Vh5GU{&Q6jPqc>>mn*Ylbz!Bm6!nV0GAUamd=
z3x4(q@1G;+m`dP%IM&Ct*VTj9vu;xjmh198jQn=uXZdU|`;_O5<30G<l|@rI{%n5M
zpU<m}fORQ0SZ~&q`FYOzy881o%V1xzOn!FH^kMJMAK~Y{SP%DpKIp87Mf#Za=X>!m
z<M-Z&|2KX&=eXxx%%^`gzj!7=iS}7TJZFjLBeGznGEvIFnY37vn46Fpduf8waIGO;
zkZCwj)+d%F7MZ1_d)sR9BB$P$4Dm9h-UKJzcmG2}{sc5j)XCJlvP>EZb_A~Bjg9mG
z_9x3`=`_(zxQ@3O6?(5>I%72W6_3qCn`UU^sZPAfRCL}n@v54k1N|jBA^pv83(qi8
zhoNS<I<&2<oJl$yS%f1@0vnI__@q1ykADPT!W~J!@hpz2r2{pz6X8O55Qp5IVQ?*u
zG2L&#p%O_4d&%M%O3<z}o?@bgU;8Q)D!8FcCj>aHYbF#u2Zt*esfBka5usk#hXZok
zm+-4;Oao_L6P{%hI(Y6m+%M(A0A5q>SoN`M;=qUa9TUNZJ1^jvv_q>wXS!5Cw}NOZ
z@~Y){LQ+F-z$s+~I&C<tb|&|oY_8y~^$`4k*LjT}kJjPOJPYCN4!o#jH_nr827BRV
z{ECTmAh%-51*GhSEy{1?;hSsmFf-_zU__~14f`vVUT_u$v0p833E;O<HaIE#rc5$`
z1LH7^CzylGIU_w5f+Im3k*t0=fdk5=I()veZ~{2CUfJ!3;cC1r7vuE~xw($F@VGKe
zhutYnXUVt+&nXkphVg)F#S+}7?jIR+Xtg>KyH#Bxbmljxa}L4x@Pm{M_G0*4UNJxZ
z^dye+#-S9R@X1Y{!Gmy2IdAC>rIn7iaa^jP!`V%RV?)OzugUvK8n&y&KDYtr<zhYB
zPDHVsqqE)+D=hI!#Prclv1DgFoulx~iIr}AzRK~r$`G$j#OGfUFLTEeD1@EUgg@cG
zl!x}f6?K#v{PF<~No4i&_@kU`#*?^L&errO6e+oTE1qT?O)tGA4F{(;jn^b!BaExt
z8S;Os+^FFft$0Kdn_t84)k^pb2WPW`C<SbmcZx3IZ93IEl2L>6_$AN$t2))Oh2Rie
zc2fmj#jy!>e#8;H!n|m2r^_v~iM{}b)Xe`hTrbs$s=+YytL^oza9AxzN5@gTB=>}G
z@gg4MDLCc3+}RqqfX5a6yLWI<l5H3u=93bOpOIQJST(quz^^5;2m9150Xy4?-$+~-
z`elfbotL1mRHuE=!IAgX4b;eOH`Nqu9Y&Qm{R>JauHqg%rqUDGFw{~wpXM?*>zod`
z_wpM4Q~5*3i&f|-jaxE!PR%yBhulK<{5<1GqkdXb$f|_BpiY6*`<aXzmg>W=+f_iK
zvw~t%I@}L?@TRncj&Z#@b}jj7L3h|)s^oyZ<!z;JXxqAp6ztR}oDQdHbD0gmF2TdD
zo}|;!!pS5)0eM-;@^QL2vN)jA8!M3Os+5btZkVlZwO&Qek}7JojUbaY+I~Dx@Y*8s
zuMAVN{OZO6BPFFFJXr9@QMzI3CgLTU$_(tMx-^Ivq&M))7NU#Q9FA)YvDOj|5&61B
zFGrC~cRUx&cV(092`d<jchS4vIyW@eb7*cb#6}gG6@5Ziq1jtOCn3YZqas3)!1b<}
znnzd{sisAu-lN!RUI~H3dRAi1D`Dv1U8d=9y%LIN6@Q3q<5}oEGZDp=M;lNqPw7S4
zgko9Dqm9s@NQ$EX^y)LbGa%UBU5MtLA?Y|hnv^2>)4PXps1xu=u0?ws4L!53cMAlz
zXKuEOHAv)_;@R7~iJ}JEE4l05J(gGM*Ss1q@48U5Ju6-~D9|3<z-iu9#k;@LylReT
z^XFZVinWGx*hLDK{=nvu%Zq0<YvgM7x*^`M#HNV+^p{M>;@#p+w|Kq9#p`WDY%Y9$
zd#CW$mwZ*c%Wr)FL;Q8>x4!q}TOWU5;88-rZ;RZGmJ>`P;PN+}fIpGWAmHAXzrM~Q
zc!Xd!!5o4rg1H2b5<EsQkD!`>zn1d%)CB|!37#MjO>EKohWNk|e^VAkQ{h}r7_LQI
z*rE>&(Oj@-t839}@ex~O8shI$TeMAD^l#cl74EzLY!MH(f(Ki{!~JK=CJ(lX2V2GC
z{(sf8D^D=pmGAzuXLztz|HxqP^SJ+C{DZyMVu)5td>k=Zt#EfT7YwJ8?P>ayY-N+(
zBItTD#GVWrBEp_550v+b=-qDuG4^HL5N%WYvR(Spp$$wti^t}`Xd}HT!3`W;x~vAN
z50pI@k+n^5rUCZDARMTscec9>aA)#L=H$_9(Hrg;?KoC=Ay?SRztsD|jmR!Mv3@69
bQEzFJ$@k8QStQseNkeS6L`Ni_57+${&rpJ>

diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py
index 37ea8ccba4..1974553447 100644
--- a/tests/test_data/test_datasets/test_scannet_dataset.py
+++ b/tests/test_data/test_datasets/test_scannet_dataset.py
@@ -27,15 +27,11 @@ def test_getitem():
             with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='PointSegClassMapping',
             valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
                            34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -79,13 +75,15 @@ def test_getitem():
          [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
          [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
     expected_gt_bboxes_3d = torch.tensor(
-        [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000],
-         [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000],
-         [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000],
-         [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000],
-         [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]])
-    expected_gt_labels = np.array(
-        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+        [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],
+         [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],
+         [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],
+         [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],
+         [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])
+    expected_gt_labels = np.array([
+        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
+        0, 0, 0, 5, 5, 5
+    ])
     expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
     expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
     original_classes = scannet_dataset.CLASSES
@@ -131,23 +129,47 @@ def test_evaluate():
     results = []
     pred_boxes = dict()
     pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
-        torch.tensor(
-            [[-3.7146, -1.0654, 0.6052, 0.6298, 1.9906, 0.4429, 0.0000],
-             [-8.5576, -1.8178, 0.2046, 1.1263, 2.7851, 1.8632, 0.0000],
-             [-8.8859, -5.3550, 0.9772, 0.9093, 0.3098, 0.5662, 0.0000],
-             [-8.0989, -5.0358, 0.0372, 0.2746, 0.2057, 0.5532, 0.0000],
-             [-6.9733, 0.3352, -0.0296, 1.2265, 0.7187, 2.2613, 0.0000],
-             [-5.3636, -1.6047, 0.3701, 2.8043, 1.1057, 0.3171, 0.0000]]))
-    pred_boxes['labels_3d'] = torch.tensor([4, 11, 11, 10, 0, 3])
+        torch.tensor([[
+            1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01,
+            5.7235e-01, 0.0000e+00
+        ],
+                      [
+                          2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01,
+                          1.7072e-01, 6.7154e-01, 0.0000e+00
+                      ],
+                      [
+                          1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01,
+                          2.5346e+00, 1.2145e+00, 0.0000e+00
+                      ],
+                      [
+                          2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01,
+                          1.8428e+00, 2.8697e-01, 0.0000e+00
+                      ],
+                      [
+                          -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01,
+                          3.8582e+00, 2.1603e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00,
+                          2.1603e-01, 1.2767e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00,
+                          6.8676e-01, 1.0586e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00,
+                          1.4927e+00, 2.3380e+00, 0.0000e+00
+                      ]]))
+    pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11])
     pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5])
     results.append(pred_boxes)
     metric = [0.25, 0.5]
     ret_dict = scannet_dataset.evaluate(results, metric)
-    assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01
+    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01
+    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01
     assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01
-    assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01
 
     # test evaluate with pipeline
     class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
@@ -163,19 +185,11 @@ def test_evaluate():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
-            with_mask_3d=True,
-            with_seg_3d=True),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
-                           34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
+            with_bbox_3d=True,
+            with_label_3d=True,
+            with_mask_3d=False,
+            with_seg_3d=False),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
@@ -185,11 +199,10 @@ def test_evaluate():
     ]
     ret_dict = scannet_dataset.evaluate(
         results, metric, pipeline=eval_pipeline)
-    assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01
+    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01
+    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01
     assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01
-    assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01
 
 
 def test_show():
@@ -254,19 +267,11 @@ def test_show():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
-            with_mask_3d=True,
-            with_seg_3d=True),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
-                           34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
+            with_bbox_3d=True,
+            with_label_3d=True,
+            with_mask_3d=False,
+            with_seg_3d=False),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
index 8f672a2f65..5e64d7e6c4 100644
--- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
@@ -3,8 +3,7 @@
 import pytest
 import torch
 
-from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes,
-                          DepthInstance3DBoxes, LiDARInstance3DBoxes)
+from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
 from mmdet3d.core.points import DepthPoints, LiDARPoints
 from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment,
                               ObjectNoise, ObjectSample, PointShuffle,
@@ -225,49 +224,22 @@ def test_points_range_filter():
 
 def test_global_alignment():
     np.random.seed(0)
-    valid_cat_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
-                     36, 39)
-    ignore_index = len(valid_cat_ids)
-    cat_ids2class = np.ones((41, ), dtype=np.int) * ignore_index
-    for class_id, cat_id in enumerate(valid_cat_ids):
-        cat_ids2class[cat_id] = class_id
-
-    global_alignment = GlobalAlignment(
-        rotation_axis=2, ignore_index=ignore_index, extract_bbox=True)
+    global_alignment = GlobalAlignment(rotation_axis=2)
 
     points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
                          np.float32).reshape(-1, 6)
-    sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin',
-                           np.long)
-    ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin',
-                           np.long)
     annos = mmcv.load('tests/data/scannet/scannet_infos.pkl')
     info = annos[0]
-    gt_bboxes_3d = info['annos']['gt_boxes_upright_depth']
     axis_align_matrix = info['annos']['axis_align_matrix']
-    gt_labels_3d = info['annos']['class']
 
     depth_points = DepthPoints(points.copy(), points_dim=6)
-    depth_bboxes = DepthInstance3DBoxes(
-        gt_bboxes_3d,
-        box_dim=gt_bboxes_3d.shape[-1],
-        with_yaw=False,
-        origin=(0.5, 0.5, 0.5))
-    sem_mask = cat_ids2class[sem_mask]
 
     input_dict = dict(
         points=depth_points.clone(),
-        gt_bboxes_3d=depth_bboxes,
-        bbox3d_fields=['gt_bboxes_3d'],
-        gt_labels_3d=gt_labels_3d,
-        ann_info=dict(axis_align_matrix=axis_align_matrix),
-        pts_instance_mask=ins_mask,
-        pts_semantic_mask=sem_mask)
+        ann_info=dict(axis_align_matrix=axis_align_matrix))
 
     input_dict = global_alignment(input_dict)
     trans_depth_points = input_dict['points']
-    trans_depth_bboxes = input_dict['gt_bboxes_3d']
-    trans_bbox_labels = input_dict['gt_labels_3d']
 
     # construct expected transformed points by affine transformation
     pts = np.ones((points.shape[0], 4))
@@ -275,60 +247,11 @@ def test_global_alignment():
     trans_pts = np.dot(pts, axis_align_matrix.T)
     expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1)
 
-    expected_bbox_labels = np.array(
-        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
-    expected_depth_bboxes = np.array(
-        [[
-            -3.714606, -1.0654305, 0.6051854, 0.6297655, 1.9905674, 0.44288868,
-            0.
-        ],
-         [
-             -8.557551, -1.8178326, 0.20456636, 1.1263373, 2.7851129,
-             1.8631845, 0.
-         ],
-         [
-             -8.885854, -5.354957, 0.97720087, 0.9093195, 0.30981588, 0.566175,
-             0.
-         ],
-         [
-             -8.098918, -5.0357704, 0.03724962, 0.27458152, 0.20566699,
-             0.5532104, 0.
-         ],
-         [
-             -6.9733434, 0.33523083, -0.02958763, 1.2264912, 0.7187278,
-             2.2613325, 0.
-         ],
-         [
-             -5.36362, -1.6046655, 0.37014085, 2.8042943, 1.1057366,
-             0.31707314, 0.
-         ], [-2.6299255, -2.3314357, 1.4469249, 0., 0., 0., 0.],
-         [-5.201888, -1.014641, 0.11020403, 0., 0., 0., 0.],
-         [
-             -3.5216672, -6.8292904, 0.26571387, 0.13945593, 0.12182455,
-             0.02463818, 0.
-         ],
-         [
-             -6.4834313, -5.4506774, 0.13558027, 1.4790803, 0.6031074,
-             0.60305846, 0.
-         ],
-         [
-             -9.338867, -4.616579, 0.6112565, 0.17650154, 0.988079, 0.16838372,
-             0.
-         ], [-2.0639155, -1.245964, 0.30754995, 0., 0., 0., 0.],
-         [-2.002855, -1.9495802, 2.2899528, 0., 0., 0., 0.],
-         [-2.1240144, -3.751592, 0.92695427, 0., 0., 0., 0.],
-         [-3.6406162, -5.1366153, 0.25374442, 0., 0., 0., 0.]])
-
     assert np.allclose(
         trans_depth_points.tensor.numpy(), expected_points, atol=1e-6)
-    assert np.all(trans_bbox_labels == expected_bbox_labels)
-    assert np.allclose(
-        trans_depth_bboxes.tensor.numpy(), expected_depth_bboxes, atol=1e-6)
 
     repr_str = repr(global_alignment)
-    expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \
-                        f' ignore_index={ignore_index},' \
-                        f' extract_bbox=True)'
+    expected_repr_str = 'GlobalAlignment(rotation_axis=2)'
     assert repr_str == expected_repr_str
 
 
diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py
index 5563dcd073..6e705e85f9 100644
--- a/tests/test_data/test_pipelines/test_indoor_pipeline.py
+++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py
@@ -23,19 +23,15 @@ def test_scannet_pipeline():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
+            with_bbox_3d=True,
+            with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='PointSegClassMapping',
             valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
                            34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -97,13 +93,15 @@ def test_scannet_pipeline():
          [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
          [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
     expected_gt_bboxes_3d = torch.tensor(
-        [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000],
-         [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000],
-         [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000],
-         [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000],
-         [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]])
-    expected_gt_labels_3d = np.array(
-        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
+         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
+         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
+         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
+         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
+    expected_gt_labels_3d = np.array([
+        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
+        0, 0, 0, 5, 5, 5
+    ])
     expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
     expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
     assert torch.allclose(points, expected_points, 1e-2)

From 14e6e08a0e0d02835979e0ff083159027f61dea4 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 20 Apr 2021 21:26:33 +0800
Subject: [PATCH 11/12] fix small bugs

---
 configs/_base_/datasets/scannet-3d-18class.py  | 11 +----------
 mmdet3d/core/visualizer/show_result.py         |  5 +----
 mmdet3d/datasets/__init__.py                   |  3 +--
 mmdet3d/datasets/scannet_dataset.py            | 13 ++-----------
 .../test_datasets/test_scannet_dataset.py      | 18 ++----------------
 tools/data_converter/scannet_data_utils.py     |  3 ---
 6 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py
index b97e858ba5..8fdd5e142a 100644
--- a/configs/_base_/datasets/scannet-3d-18class.py
+++ b/configs/_base_/datasets/scannet-3d-18class.py
@@ -5,8 +5,6 @@
                'bookshelf', 'picture', 'counter', 'desk', 'curtain',
                'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
                'garbagebin')
-valid_class_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,
-                   39)
 train_pipeline = [
     dict(
         type='LoadPointsFromFile',
@@ -79,7 +77,6 @@
 ]
 # construct a pipeline for data and gt loading in show function
 # please keep its loading function consistent with test_pipeline (e.g. client)
-# we need to load gt masks for aligned gt bbox extracting
 eval_pipeline = [
     dict(
         type='LoadPointsFromFile',
@@ -87,18 +84,12 @@
         shift_height=False,
         load_dim=6,
         use_dim=[0, 1, 2]),
-    dict(
-        type='LoadAnnotations3D',
-        with_bbox_3d=True,
-        with_label_3d=True,
-        with_mask_3d=False,
-        with_seg_3d=False),
     dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='DefaultFormatBundle3D',
         class_names=class_names,
         with_label=False),
-    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+    dict(type='Collect3D', keys=['points'])
 ]
 
 data = dict(
diff --git a/mmdet3d/core/visualizer/show_result.py b/mmdet3d/core/visualizer/show_result.py
index e7b4789e0a..c4f414ad3a 100644
--- a/mmdet3d/core/visualizer/show_result.py
+++ b/mmdet3d/core/visualizer/show_result.py
@@ -61,10 +61,7 @@ def convert_oriented_box_to_trimesh_fmt(box):
         scene_bbox = np.zeros((1, 7))
     scene = trimesh.scene.Scene()
     for box in scene_bbox:
-        try:
-            scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
-        except ValueError:  # invalid box shape, e.g. width==0
-            continue
+        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
 
     mesh_list = trimesh.util.concatenate(scene.dump())
     # save to obj file
diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py
index f98b22858d..3dbab47e5c 100644
--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
@@ -27,8 +27,7 @@
     'DATASETS', 'build_dataset', 'CocoDataset', 'NuScenesDataset',
     'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D',
     'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter',
-    'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile',
-    'S3DISSegDataset',
+    'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', 'S3DISSegDataset',
     'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
     'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 'ScanNetDataset',
     'ScanNetSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset',
diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py
index 1dfff7d7f7..eaba0ad3fc 100644
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -163,20 +163,12 @@ def _build_default_pipeline(self):
                 shift_height=False,
                 load_dim=6,
                 use_dim=[0, 1, 2]),
-            dict(
-                type='LoadAnnotations3D',
-                with_bbox_3d=True,
-                with_label_3d=True,
-                with_mask_3d=False,
-                with_seg_3d=False),
             dict(type='GlobalAlignment', rotation_axis=2),
             dict(
                 type='DefaultFormatBundle3D',
                 class_names=self.CLASSES,
                 with_label=False),
-            dict(
-                type='Collect3D',
-                keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+            dict(type='Collect3D', keys=['points'])
         ]
         return Compose(pipeline)
 
@@ -196,8 +188,7 @@ def show(self, results, out_dir, show=True, pipeline=None):
             data_info = self.data_infos[i]
             pts_path = data_info['pts_path']
             file_name = osp.split(pts_path)[-1].split('.')[0]
-            points = self._extract_data(
-                i, pipeline, 'points', load_annos=True).numpy()
+            points = self._extract_data(i, pipeline, 'points').numpy()
             gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
             pred_bboxes = result['boxes_3d'].tensor.numpy()
             show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,
diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py
index 1974553447..b907fe92cb 100644
--- a/tests/test_data/test_datasets/test_scannet_dataset.py
+++ b/tests/test_data/test_datasets/test_scannet_dataset.py
@@ -183,19 +183,12 @@ def test_evaluate():
             shift_height=False,
             load_dim=6,
             use_dim=[0, 1, 2]),
-        dict(
-            type='LoadAnnotations3D',
-            with_bbox_3d=True,
-            with_label_3d=True,
-            with_mask_3d=False,
-            with_seg_3d=False),
         dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
             with_label=False),
-        dict(
-            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+        dict(type='Collect3D', keys=['points'])
     ]
     ret_dict = scannet_dataset.evaluate(
         results, metric, pipeline=eval_pipeline)
@@ -265,19 +258,12 @@ def test_show():
             shift_height=False,
             load_dim=6,
             use_dim=[0, 1, 2]),
-        dict(
-            type='LoadAnnotations3D',
-            with_bbox_3d=True,
-            with_label_3d=True,
-            with_mask_3d=False,
-            with_seg_3d=False),
         dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
             with_label=False),
-        dict(
-            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+        dict(type='Collect3D', keys=['points'])
     ]
     tmp_dir = tempfile.TemporaryDirectory()
     temp_dir = tmp_dir.name
diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py
index c94ab54156..c52b3f5b63 100644
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
@@ -219,9 +219,6 @@ def _convert_to_label(self, mask):
                 mask = np.load(mask)
             else:
                 mask = np.fromfile(mask, dtype=np.long)
-        # first filter out unannotated points (labeled as 0)
-        mask = mask[mask != 0]
-        # then convert to [0, 20) labels
         label = self.cat_id2class[mask]
         return label
 

From a9cc7fd62a242ed7aa498232410da47f546a5de8 Mon Sep 17 00:00:00 2001
From: Wuziyi616 <dazitu616@gmail.com>
Date: Tue, 11 May 2021 21:12:33 +0800
Subject: [PATCH 12/12] simplify bbox extraction code

---
 data/scannet/load_scannet_data.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/data/scannet/load_scannet_data.py b/data/scannet/load_scannet_data.py
index 7e7cd55709..911bb4c7fe 100644
--- a/data/scannet/load_scannet_data.py
+++ b/data/scannet/load_scannet_data.py
@@ -61,15 +61,10 @@ def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id,
         obj_pc = mesh_vertices[instance_ids == obj_id, 0:3]
         if len(obj_pc) == 0:
             continue
-        xmin = np.min(obj_pc[:, 0])
-        ymin = np.min(obj_pc[:, 1])
-        zmin = np.min(obj_pc[:, 2])
-        xmax = np.max(obj_pc[:, 0])
-        ymax = np.max(obj_pc[:, 1])
-        zmax = np.max(obj_pc[:, 2])
-        bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
-                         (zmin + zmax) / 2, xmax - xmin, ymax - ymin,
-                         zmax - zmin, label_id])
+        xyz_min = np.min(obj_pc, axis=0)
+        xyz_max = np.max(obj_pc, axis=0)
+        bbox = np.concatenate([(xyz_min + xyz_max) / 2.0, xyz_max - xyz_min,
+                               np.array([label_id])])
         # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
         instance_bboxes[obj_id - 1, :] = bbox
     return instance_bboxes