open-mmlab · Tai-Wang · Nov 23, 2022 · Nov 3, 2022 · Nov 4, 2022 · Nov 8, 2022
diff --git a/configs/_base_/datasets/s3dis-3d.py b/configs/_base_/datasets/s3dis-3d.py
@@ -0,0 +1,110 @@
+# dataset settings
+dataset_type = 'S3DISDataset'
+data_root = 'data/s3dis/'
+
+metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
+train_area = [1, 2, 3, 4, 6]
+test_area = 5
+
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(type='PointSample', num_points=40000),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.087266, 0.087266],
+        scale_ratio_range=[1.0, 1.0]),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=40000),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type='ConcatDataset',
+            datasets=[
+                dict(
+                    type=dataset_type,
+                    data_root=data_root,
+                    ann_file=f's3dis_infos_Area_{i}.pkl',
+                    pipeline=train_pipeline,
+                    filter_empty_gt=False,
+                    metainfo=metainfo,
+                    box_type_3d='Depth') for i in train_area
+            ])))
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=f's3dis_infos_Area_{test_area}.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth'))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=f's3dis_infos_Area{test_area}.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth'))
+val_evaluator = dict(type='IndoorMetric')
+test_evaluator = val_evaluator
+
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py b/configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
@@ -0,0 +1,111 @@
+_base_ = [
+    '../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
+    '../_base_/datasets/s3dis-3d.py'
+]
+dataset_type = 'S3DISDataset'
+data_root = 'data/s3dis/'
+
+metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
+train_area = [1, 2, 3, 4, 6]
+test_area = 5
+n_points = 100000
+
+model = dict(bbox_head=dict(num_classes=5))
+
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(type='LoadAnnotations3D'),
+    dict(type='PointSample', num_points=n_points),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.087266, 0.087266],
+        scale_ratio_range=[0.9, 1.1],
+        translation_std=[.1, .1, .1],
+        shift_height=False),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=n_points)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+
+train_dataloader = dict(
+    batch_size=8,
+    dataset=dict(
+        type='RepeatDataset',
+        times=13,
+        dataset=dict(
+            type='ConcatDataset',
+            datasets=[
+                dict(
+                    type=dataset_type,
+                    data_root=data_root,
+                    ann_file=f's3dis_infos_Area_{i}.pkl',
+                    pipeline=train_pipeline,
+                    filter_empty_gt=True,
+                    metainfo=metainfo,
+                    box_type_3d='Depth') for i in train_area
+            ])))
+val_dataloader = dict(
+    dataset=dict(
+        ann_file=f's3dis_infos_Area_{test_area}.pkl', pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=10, norm_type=2))
+
+# learning rate
+param_scheduler = dict(
+    type='MultiStepLR',
+    begin=0,
+    end=12,
+    by_epoch=True,
+    milestones=[8, 11],
+    gamma=0.1)
+
+custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
+
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py
@@ -1,9 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
-from collections import OrderedDict
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
+from nuscenes import NuScenes
 from nuscenes.utils.geometry_utils import view_points
 from pyquaternion import Quaternion
 from shapely.geometry import MultiPoint, box
@@ -53,19 +53,20 @@
 }
 
 
-def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
-                          visibilities: List[str]):
-    """Get the 2d / mono3d annotation records for a given `sample_data_token of
-    nuscenes dataset.
+def get_nuscenes_2d_boxes(nusc: NuScenes, sample_data_token: str,
+                          visibilities: List[str]) -> List[dict]:
+    """Get the 2d / mono3d annotation records for a given `sample_data_token`
+    of nuscenes dataset.
 
     Args:
+        nusc (:obj:`NuScenes`): NuScenes class.
         sample_data_token (str): Sample data token belonging to a camera
             keyframe.
-        visibilities (list[str]): Visibility filter.
+        visibilities (List[str]): Visibility filter.
 
     Return:
-        list[dict]: List of 2d annotation record that belongs to the input
-            `sample_data_token`.
+        List[dict]: List of 2d annotation record that belongs to the input
+        `sample_data_token`.
     """
 
     # Get the sample data and the sample corresponding to that sample data.
@@ -190,7 +191,7 @@ def get_kitti_style_2d_boxes(info: dict,
                              occluded: Tuple[int] = (0, 1, 2, 3),
                              annos: Optional[dict] = None,
                              mono3d: bool = True,
-                             dataset: str = 'kitti'):
+                             dataset: str = 'kitti') -> List[dict]:
     """Get the 2d / mono3d annotation records for a given info.
 
     This function is used to get 2D/Mono3D annotations when loading annotations
@@ -202,7 +203,7 @@ def get_kitti_style_2d_boxes(info: dict,
             belong to. In KITTI, typically only CAM 2 will be used,
             and in Waymo, multi cameras could be used.
             Defaults to 2.
-        occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
+        occluded (Tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
             0 = fully visible, 1 = partly occluded, 2 = largely occluded,
             3 = unknown, -1 = DontCare.
             Defaults to (0, 1, 2, 3).
@@ -213,8 +214,8 @@ def get_kitti_style_2d_boxes(info: dict,
             Defaults to 'kitti'.
 
     Return:
-        list[dict]: List of 2d / mono3d annotation record that
-            belongs to the input camera id.
+        List[dict]: List of 2d / mono3d annotation record that
+        belongs to the input camera id.
     """
     # Get calibration information
     camera_intrinsic = info['calib'][f'P{cam_idx}']
@@ -336,20 +337,20 @@ def convert_annos(info: dict, cam_idx: int) -> dict:
 
 
 def post_process_coords(
-    corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
-) -> Union[Tuple[float, float, float, float], None]:
+    corner_coords: List[int], imsize: Tuple[int] = (1600, 900)
+) -> Union[Tuple[float], None]:
     """Get the intersection of the convex hull of the reprojected bbox corners
     and the image canvas, return None if no intersection.
 
     Args:
-        corner_coords (list[int]): Corner coordinates of reprojected
+        corner_coords (List[int]): Corner coordinates of reprojected
             bounding box.
-        imsize (tuple[int]): Size of the image canvas.
+        imsize (Tuple[int]): Size of the image canvas.
             Defaults to (1600, 900).
 
     Return:
-        tuple[float]: Intersection of the convex hull of the 2D box
-            corners and the image canvas.
+        Tuple[float] or None: Intersection of the convex hull of the 2D box
+        corners and the image canvas.
     """
     polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
     img_canvas = box(0, 0, imsize[0], imsize[1])
@@ -370,7 +371,7 @@ def post_process_coords(
 
 
 def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
-                    dataset: str) -> OrderedDict:
+                    dataset: str) -> dict:
     """Generate one 2D annotation record given various information on top of
     the 2D bounding box coordinates.
 
@@ -387,7 +388,7 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
 
             - bbox_label (int): 2d box label id
             - bbox_label_3d (int): 3d box label id
-            - bbox (list[float]): left x, top y, right x, bottom y of 2d box
+            - bbox (List[float]): left x, top y, right x, bottom y of 2d box
             - bbox_3d_isvalid (bool): whether the box is valid
     """