diff --git a/configs/_base_/datasets/scannet-3d-18class.py b/configs/_base_/datasets/scannet-3d-18class.py
index c34a575961..b97e858ba5 100644
--- a/configs/_base_/datasets/scannet-3d-18class.py
+++ b/configs/_base_/datasets/scannet-3d-18class.py
@@ -16,18 +16,16 @@
         use_dim=[0, 1, 2]),
     dict(
         type='LoadAnnotations3D',
-        with_bbox_3d=False,
-        with_label_3d=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
         with_mask_3d=True,
         with_seg_3d=True),
+    dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='PointSegClassMapping',
         valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
                        36, 39),
         max_cat_id=40),
-    dict(
-        type='GlobalAlignment', rotation_axis=2,
-        ignore_index=len(class_names)),
     dict(type='IndoorPointSample', num_points=40000),
     dict(
         type='RandomFlip3D',
@@ -54,9 +52,7 @@
         shift_height=True,
         load_dim=6,
         use_dim=[0, 1, 2]),
-    dict(
-        type='GlobalAlignment', rotation_axis=2,
-        ignore_index=len(class_names)),
+    dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='MultiScaleFlipAug3D',
         img_scale=(1333, 800),
@@ -93,16 +89,11 @@
         use_dim=[0, 1, 2]),
     dict(
         type='LoadAnnotations3D',
-        with_bbox_3d=False,
-        with_label_3d=False,
-        with_mask_3d=True,
-        with_seg_3d=True),
-    dict(type='PointSegClassMapping', valid_cat_ids=valid_class_ids),
-    dict(
-        type='GlobalAlignment',
-        rotation_axis=2,
-        ignore_index=len(class_names),
-        extract_bbox=True),
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_mask_3d=False,
+        with_seg_3d=False),
+    dict(type='GlobalAlignment', rotation_axis=2),
     dict(
         type='DefaultFormatBundle3D',
         class_names=class_names,
diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py
index 1aff3924e1..fa8509cb94 100644
--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -296,17 +296,11 @@ def __repr__(self):
 @PIPELINES.register_module()
 class GlobalAlignment(object):
     """Apply global alignment to 3D scene points by rotation and translation.
-    Extract 3D bboxes from the aligned points and instance mask if provided.
 
     Args:
         rotation_axis (int): Rotation axis for points and bboxes rotation.
-        ignore_index (int): Label index for which we won't extract bboxes.
-        extract_bbox (bool): Whether extract new ground-truth bboxes after \
-            alignment. This requires instance and semantic mask inputs.
-            Defaults to False.
 
     Note:
-        This function should be called after PointSegClassMapping in pipeline.
         We do not record the applied rotation and translation as in \
             GlobalRotScaleTrans. Because usually, we do not need to reverse \
             the alignment step.
@@ -314,10 +308,8 @@ class GlobalAlignment(object):
             bounding boxes for evaluation.
     """
 
-    def __init__(self, rotation_axis, ignore_index, extract_bbox=False):
+    def __init__(self, rotation_axis):
         self.rotation_axis = rotation_axis
-        self.ignore_index = ignore_index
-        self.extract_bbox = extract_bbox
 
     def _trans_points(self, input_dict, trans_factor):
         """Private function to translate points.
@@ -357,74 +349,6 @@ def _check_rot_mat(self, rot_mat):
         is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()
         assert is_valid, f'invalid rotation matrix {rot_mat}'
 
-    def _bbox_from_points(self, points):
-        """Get the bounding box of a set of points.
-
-        Args:
-            points (np.ndarray): A set of points belonging to one instance.
-
-        Returns:
-            np.ndarray: A bounding box of input points. We use origin as \
-                (0.5, 0.5, 0.5) without yaw.
-        """
-        xmin = np.min(points[:, 0])
-        ymin = np.min(points[:, 1])
-        zmin = np.min(points[:, 2])
-        xmax = np.max(points[:, 0])
-        ymax = np.max(points[:, 1])
-        zmax = np.max(points[:, 2])
-        bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
-                         (zmin + zmax) / 2, xmax - xmin, ymax - ymin,
-                         zmax - zmin])
-        return bbox
-
-    def _extract_bboxes(self, input_dict):
-        """Extract bounding boxes from points, semantic mask and instance mask.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after extracting bboxes, keys in \
-                input_dict['bbox3d_fields'] are updated in the dict.
-        """
-        # TODO: this function is only used in ScanNet-Det pipeline currently
-        # TODO: we only extract gt_bboxes_3d which is DepthInstance3DBoxes
-        from mmdet3d.core.bbox import DepthInstance3DBoxes
-
-        assert 'pts_instance_mask' in input_dict.keys(), \
-            'instance mask is not provided in GlobalAlignment'
-        assert 'pts_semantic_mask' in input_dict.keys(), \
-            'semantic mask is not provided in GlobalAlignment'
-
-        coords = input_dict['points'].coord.numpy()
-        inst_mask = input_dict['pts_instance_mask']
-        sem_mask = input_dict['pts_semantic_mask']
-
-        # select points from valid categories where we want to extract bboxes
-        valid_cat_mask = (sem_mask != self.ignore_index)
-        inst_ids = np.unique(inst_mask[valid_cat_mask])  # ids of valid insts
-        instance_bboxes = np.zeros((inst_ids.shape[0], 7))
-        inst_id2cat_id = {
-            inst_id: sem_mask[inst_mask == inst_id][0]
-            for inst_id in inst_ids
-        }
-        for bbox_idx, inst_id in enumerate(inst_ids):
-            cat_id = inst_id2cat_id[inst_id]
-            inst_coords = coords[inst_mask == inst_id]
-            bbox = self._bbox_from_points(inst_coords)
-            instance_bboxes[bbox_idx, :6] = bbox
-            instance_bboxes[bbox_idx, 6] = cat_id
-
-        if 'gt_bboxes_3d' not in input_dict['bbox3d_fields']:
-            input_dict['bbox3d_fields'].append('gt_bboxes_3d')
-        input_dict['gt_bboxes_3d'] = DepthInstance3DBoxes(
-            instance_bboxes[:, :6],
-            box_dim=6,
-            with_yaw=False,
-            origin=(0.5, 0.5, 0.5))
-        input_dict['gt_labels_3d'] = instance_bboxes[:, 6].astype(np.long)
-
     def __call__(self, input_dict):
         """Call function to shuffle points.
 
@@ -447,16 +371,12 @@ def __call__(self, input_dict):
         self._check_rot_mat(rot_mat)
         self._rot_points(input_dict, rot_mat)
         self._trans_points(input_dict, trans_vec)
-        if self.extract_bbox:
-            self._extract_bboxes(input_dict)
 
         return input_dict
 
     def __repr__(self):
         repr_str = self.__class__.__name__
-        repr_str += f'(rotation_axis={self.rotation_axis},'
-        repr_str += f' ignore_index={self.ignore_index},'
-        repr_str += f' extract_bbox={self.extract_bbox})'
+        repr_str += f'(rotation_axis={self.rotation_axis})'
         return repr_str
 
 
diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py
index dc7e34cae1..1dfff7d7f7 100644
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -154,89 +154,6 @@ def _get_axis_align_matrix(info):
                 'use new pre-process scripts to re-generate ScanNet data')
             return np.eye(4).astype(np.float32)
 
-    def evaluate(self,
-                 results,
-                 metric=None,
-                 iou_thr=(0.25, 0.5),
-                 logger=None,
-                 show=False,
-                 out_dir=None,
-                 pipeline=None):
-        """Evaluate.
-
-        Evaluation in indoor protocol.
-        Since ScanNet detection data pipeline re-computes ground-truth boxes,
-            we can't directly use gt_bboxes from self.data_infos.
-
-        Args:
-            results (list[dict]): List of results.
-            metric (str | list[str]): Metrics to be evaluated.
-            iou_thr (list[float]): AP IoU thresholds.
-            show (bool): Whether to visualize.
-                Default: False.
-            out_dir (str): Path to save the visualization results.
-                Default: None.
-            pipeline (list[dict], optional): raw data loading for showing.
-                Default: None.
-
-        Returns:
-            dict: Evaluation results.
-        """
-        from mmdet3d.core.evaluation import indoor_eval
-        assert isinstance(
-            results, list), f'Expect results to be list, got {type(results)}.'
-        assert len(results) > 0, 'Expect length of results > 0.'
-        assert len(results) == len(self.data_infos)
-        assert isinstance(
-            results[0], dict
-        ), f'Expect elements in results to be dict, got {type(results[0])}.'
-        # load gt_bboxes via pipeline
-        pipeline = self._get_pipeline(pipeline)
-        gt_bboxes = [
-            self._extract_data(
-                i, pipeline, ['gt_bboxes_3d', 'gt_labels_3d'], load_annos=True)
-            for i in range(len(self.data_infos))
-        ]
-        gt_annos = [self._build_annos(*gt_bbox) for gt_bbox in gt_bboxes]
-        label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
-        ret_dict = indoor_eval(
-            gt_annos,
-            results,
-            iou_thr,
-            label2cat,
-            logger=logger,
-            box_type_3d=self.box_type_3d,
-            box_mode_3d=self.box_mode_3d)
-        if show:
-            self.show(results, out_dir, pipeline=pipeline)
-
-        return ret_dict
-
-    @staticmethod
-    def _build_annos(gt_bboxes, gt_labels):
-        """Transform gt bboxes and labels into self.data_infos['annos'] format.
-
-        Args:
-            gt_bboxes (:obj:`BaseInstance3DBoxes`): \
-                3D bounding boxes in Depth coordinate
-            gt_labels (torch.Tensor): Labels of boxes.
-
-        Returns:
-            dict: annotations including the following keys
-
-                - gt_boxes_upright_depth (np.ndarray): 3D bounding boxes.
-                - class (np.ndarray): Labels of boxes.
-                - gt_num (int): Number of boxes.
-        """
-        bbox = gt_bboxes.tensor.numpy()[:, :6].copy()  # drop yaw dimension
-        bbox[..., 2] += bbox[..., 5] / 2  # bottom center to gravity center
-        anno = {
-            'gt_boxes_upright_depth': bbox,
-            'class': gt_labels.numpy(),
-            'gt_num': gt_labels.shape[0]
-        }
-        return anno
-
     def _build_default_pipeline(self):
         """Build the default pipeline for this dataset."""
         pipeline = [
@@ -248,19 +165,11 @@ def _build_default_pipeline(self):
                 use_dim=[0, 1, 2]),
             dict(
                 type='LoadAnnotations3D',
-                with_bbox_3d=False,
-                with_label_3d=False,
-                with_mask_3d=True,
-                with_seg_3d=True),
-            dict(
-                type='PointSegClassMapping',
-                valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
-                               33, 34, 36, 39)),
-            dict(
-                type='GlobalAlignment',
-                rotation_axis=2,
-                ignore_index=len(self.CLASSES),
-                extract_bbox=True),
+                with_bbox_3d=True,
+                with_label_3d=True,
+                with_mask_3d=False,
+                with_seg_3d=False),
+            dict(type='GlobalAlignment', rotation_axis=2),
             dict(
                 type='DefaultFormatBundle3D',
                 class_names=self.CLASSES,
@@ -287,10 +196,9 @@ def show(self, results, out_dir, show=True, pipeline=None):
             data_info = self.data_infos[i]
             pts_path = data_info['pts_path']
             file_name = osp.split(pts_path)[-1].split('.')[0]
-            points, gt_bboxes = self._extract_data(
-                i, pipeline, ['points', 'gt_bboxes_3d'], load_annos=True)
-            points = points.numpy()
-            gt_bboxes = gt_bboxes.tensor.numpy()
+            points = self._extract_data(
+                i, pipeline, 'points', load_annos=True).numpy()
+            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
             pred_bboxes = result['boxes_3d'].tensor.numpy()
             show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,
                         show)
diff --git a/tests/data/scannet/scannet_infos.pkl b/tests/data/scannet/scannet_infos.pkl
index d0fe1b839d..20595af6a5 100644
Binary files a/tests/data/scannet/scannet_infos.pkl and b/tests/data/scannet/scannet_infos.pkl differ
diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py
index 37ea8ccba4..1974553447 100644
--- a/tests/test_data/test_datasets/test_scannet_dataset.py
+++ b/tests/test_data/test_datasets/test_scannet_dataset.py
@@ -27,15 +27,11 @@ def test_getitem():
             with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='PointSegClassMapping',
             valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
                            34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -79,13 +75,15 @@ def test_getitem():
          [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
          [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
     expected_gt_bboxes_3d = torch.tensor(
-        [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000],
-         [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000],
-         [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000],
-         [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000],
-         [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]])
-    expected_gt_labels = np.array(
-        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+        [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],
+         [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],
+         [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],
+         [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],
+         [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])
+    expected_gt_labels = np.array([
+        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
+        0, 0, 0, 5, 5, 5
+    ])
     expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
     expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
     original_classes = scannet_dataset.CLASSES
@@ -131,23 +129,47 @@ def test_evaluate():
     results = []
     pred_boxes = dict()
     pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
-        torch.tensor(
-            [[-3.7146, -1.0654, 0.6052, 0.6298, 1.9906, 0.4429, 0.0000],
-             [-8.5576, -1.8178, 0.2046, 1.1263, 2.7851, 1.8632, 0.0000],
-             [-8.8859, -5.3550, 0.9772, 0.9093, 0.3098, 0.5662, 0.0000],
-             [-8.0989, -5.0358, 0.0372, 0.2746, 0.2057, 0.5532, 0.0000],
-             [-6.9733, 0.3352, -0.0296, 1.2265, 0.7187, 2.2613, 0.0000],
-             [-5.3636, -1.6047, 0.3701, 2.8043, 1.1057, 0.3171, 0.0000]]))
-    pred_boxes['labels_3d'] = torch.tensor([4, 11, 11, 10, 0, 3])
+        torch.tensor([[
+            1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01,
+            5.7235e-01, 0.0000e+00
+        ],
+                      [
+                          2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01,
+                          1.7072e-01, 6.7154e-01, 0.0000e+00
+                      ],
+                      [
+                          1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01,
+                          2.5346e+00, 1.2145e+00, 0.0000e+00
+                      ],
+                      [
+                          2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01,
+                          1.8428e+00, 2.8697e-01, 0.0000e+00
+                      ],
+                      [
+                          -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01,
+                          3.8582e+00, 2.1603e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00,
+                          2.1603e-01, 1.2767e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00,
+                          6.8676e-01, 1.0586e+00, 0.0000e+00
+                      ],
+                      [
+                          -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00,
+                          1.4927e+00, 2.3380e+00, 0.0000e+00
+                      ]]))
+    pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11])
     pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5])
     results.append(pred_boxes)
     metric = [0.25, 0.5]
     ret_dict = scannet_dataset.evaluate(results, metric)
-    assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01
+    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01
+    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01
     assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01
-    assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01
 
     # test evaluate with pipeline
     class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
@@ -163,19 +185,11 @@ def test_evaluate():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
-            with_mask_3d=True,
-            with_seg_3d=True),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
-                           34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
+            with_bbox_3d=True,
+            with_label_3d=True,
+            with_mask_3d=False,
+            with_seg_3d=False),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
@@ -185,11 +199,10 @@ def test_evaluate():
     ]
     ret_dict = scannet_dataset.evaluate(
         results, metric, pipeline=eval_pipeline)
-    assert abs(ret_dict['table_AP_0.25'] - 0.5) < 0.01
+    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01
+    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01
+    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01
     assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['desk_AP_0.25'] - 1.0) < 0.01
-    assert abs(ret_dict['cabinet_AP_0.25'] - 0.25) < 0.01
-    assert abs(ret_dict['sofa_AP_0.25'] - 1.0) < 0.01
 
 
 def test_show():
@@ -254,19 +267,11 @@ def test_show():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
-            with_mask_3d=True,
-            with_seg_3d=True),
-        dict(
-            type='PointSegClassMapping',
-            valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
-                           34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
+            with_bbox_3d=True,
+            with_label_3d=True,
+            with_mask_3d=False,
+            with_seg_3d=False),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='DefaultFormatBundle3D',
             class_names=class_names,
diff --git a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
index 8f672a2f65..5e64d7e6c4 100644
--- a/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
+++ b/tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py
@@ -3,8 +3,7 @@
 import pytest
 import torch
 
-from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes,
-                          DepthInstance3DBoxes, LiDARInstance3DBoxes)
+from mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
 from mmdet3d.core.points import DepthPoints, LiDARPoints
 from mmdet3d.datasets import (BackgroundPointsFilter, GlobalAlignment,
                               ObjectNoise, ObjectSample, PointShuffle,
@@ -225,49 +224,22 @@ def test_points_range_filter():
 
 def test_global_alignment():
     np.random.seed(0)
-    valid_cat_ids = (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
-                     36, 39)
-    ignore_index = len(valid_cat_ids)
-    cat_ids2class = np.ones((41, ), dtype=np.int) * ignore_index
-    for class_id, cat_id in enumerate(valid_cat_ids):
-        cat_ids2class[cat_id] = class_id
-
-    global_alignment = GlobalAlignment(
-        rotation_axis=2, ignore_index=ignore_index, extract_bbox=True)
+    global_alignment = GlobalAlignment(rotation_axis=2)
 
     points = np.fromfile('tests/data/scannet/points/scene0000_00.bin',
                          np.float32).reshape(-1, 6)
-    sem_mask = np.fromfile('tests/data/scannet/semantic_mask/scene0000_00.bin',
-                           np.long)
-    ins_mask = np.fromfile('tests/data/scannet/instance_mask/scene0000_00.bin',
-                           np.long)
     annos = mmcv.load('tests/data/scannet/scannet_infos.pkl')
     info = annos[0]
-    gt_bboxes_3d = info['annos']['gt_boxes_upright_depth']
     axis_align_matrix = info['annos']['axis_align_matrix']
-    gt_labels_3d = info['annos']['class']
 
     depth_points = DepthPoints(points.copy(), points_dim=6)
-    depth_bboxes = DepthInstance3DBoxes(
-        gt_bboxes_3d,
-        box_dim=gt_bboxes_3d.shape[-1],
-        with_yaw=False,
-        origin=(0.5, 0.5, 0.5))
-    sem_mask = cat_ids2class[sem_mask]
 
     input_dict = dict(
         points=depth_points.clone(),
-        gt_bboxes_3d=depth_bboxes,
-        bbox3d_fields=['gt_bboxes_3d'],
-        gt_labels_3d=gt_labels_3d,
-        ann_info=dict(axis_align_matrix=axis_align_matrix),
-        pts_instance_mask=ins_mask,
-        pts_semantic_mask=sem_mask)
+        ann_info=dict(axis_align_matrix=axis_align_matrix))
 
     input_dict = global_alignment(input_dict)
     trans_depth_points = input_dict['points']
-    trans_depth_bboxes = input_dict['gt_bboxes_3d']
-    trans_bbox_labels = input_dict['gt_labels_3d']
 
     # construct expected transformed points by affine transformation
     pts = np.ones((points.shape[0], 4))
@@ -275,60 +247,11 @@ def test_global_alignment():
     trans_pts = np.dot(pts, axis_align_matrix.T)
     expected_points = np.concatenate([trans_pts[:, :3], points[:, 3:]], axis=1)
 
-    expected_bbox_labels = np.array(
-        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
-    expected_depth_bboxes = np.array(
-        [[
-            -3.714606, -1.0654305, 0.6051854, 0.6297655, 1.9905674, 0.44288868,
-            0.
-        ],
-         [
-             -8.557551, -1.8178326, 0.20456636, 1.1263373, 2.7851129,
-             1.8631845, 0.
-         ],
-         [
-             -8.885854, -5.354957, 0.97720087, 0.9093195, 0.30981588, 0.566175,
-             0.
-         ],
-         [
-             -8.098918, -5.0357704, 0.03724962, 0.27458152, 0.20566699,
-             0.5532104, 0.
-         ],
-         [
-             -6.9733434, 0.33523083, -0.02958763, 1.2264912, 0.7187278,
-             2.2613325, 0.
-         ],
-         [
-             -5.36362, -1.6046655, 0.37014085, 2.8042943, 1.1057366,
-             0.31707314, 0.
-         ], [-2.6299255, -2.3314357, 1.4469249, 0., 0., 0., 0.],
-         [-5.201888, -1.014641, 0.11020403, 0., 0., 0., 0.],
-         [
-             -3.5216672, -6.8292904, 0.26571387, 0.13945593, 0.12182455,
-             0.02463818, 0.
-         ],
-         [
-             -6.4834313, -5.4506774, 0.13558027, 1.4790803, 0.6031074,
-             0.60305846, 0.
-         ],
-         [
-             -9.338867, -4.616579, 0.6112565, 0.17650154, 0.988079, 0.16838372,
-             0.
-         ], [-2.0639155, -1.245964, 0.30754995, 0., 0., 0., 0.],
-         [-2.002855, -1.9495802, 2.2899528, 0., 0., 0., 0.],
-         [-2.1240144, -3.751592, 0.92695427, 0., 0., 0., 0.],
-         [-3.6406162, -5.1366153, 0.25374442, 0., 0., 0., 0.]])
-
     assert np.allclose(
         trans_depth_points.tensor.numpy(), expected_points, atol=1e-6)
-    assert np.all(trans_bbox_labels == expected_bbox_labels)
-    assert np.allclose(
-        trans_depth_bboxes.tensor.numpy(), expected_depth_bboxes, atol=1e-6)
 
     repr_str = repr(global_alignment)
-    expected_repr_str = 'GlobalAlignment(rotation_axis=2,' \
-                        f' ignore_index={ignore_index},' \
-                        f' extract_bbox=True)'
+    expected_repr_str = 'GlobalAlignment(rotation_axis=2)'
     assert repr_str == expected_repr_str
 
 
diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py
index 5563dcd073..6e705e85f9 100644
--- a/tests/test_data/test_pipelines/test_indoor_pipeline.py
+++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py
@@ -23,19 +23,15 @@ def test_scannet_pipeline():
             use_dim=[0, 1, 2]),
         dict(
             type='LoadAnnotations3D',
-            with_bbox_3d=False,
-            with_label_3d=False,
+            with_bbox_3d=True,
+            with_label_3d=True,
             with_mask_3d=True,
             with_seg_3d=True),
+        dict(type='GlobalAlignment', rotation_axis=2),
         dict(
             type='PointSegClassMapping',
             valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33,
                            34, 36, 39)),
-        dict(
-            type='GlobalAlignment',
-            rotation_axis=2,
-            ignore_index=len(class_names),
-            extract_bbox=True),
         dict(type='IndoorPointSample', num_points=5),
         dict(
             type='RandomFlip3D',
@@ -97,13 +93,15 @@ def test_scannet_pipeline():
          [6.8790e+00, 1.5086e+00, -9.3154e-02, 6.3816e-03],
          [4.8253e+00, 2.6668e-01, 1.4917e+00, 1.5912e+00]])
     expected_gt_bboxes_3d = torch.tensor(
-        [[3.6132, 1.3705, 0.6052, 0.7930, 2.0360, 0.4429, 0.0000],
-         [8.3769, 2.5228, 0.2046, 1.3539, 2.8691, 1.8632, 0.0000],
-         [8.4100, 6.0750, 0.9772, 0.9319, 0.3843, 0.5662, 0.0000],
-         [7.6524, 5.6915, 0.0372, 0.2907, 0.2278, 0.5532, 0.0000],
-         [6.9771, 0.2455, -0.0296, 1.2820, 0.8182, 2.2613, 0.0000]])
-    expected_gt_labels_3d = np.array(
-        [4, 11, 11, 10, 0, 3, 12, 4, 14, 1, 0, 0, 0, 5, 5]).astype(np.long)
+        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
+         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
+         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
+         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
+         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
+    expected_gt_labels_3d = np.array([
+        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
+        0, 0, 0, 5, 5, 5
+    ])
     expected_pts_semantic_mask = np.array([0, 18, 18, 18, 18])
     expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
     assert torch.allclose(points, expected_points, 1e-2)