Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support FCAF3D on S3DIS dataset in dev-1.x branch #1984

Merged
merged 22 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions configs/_base_/datasets/s3dis-3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# dataset settings
dataset_type = 'S3DISDataset'
data_root = 'data/s3dis/'

metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
train_area = [1, 2, 3, 4, 6]
test_area = 5

train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=40000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=40000),
]),
dict(type='Pack3DDetInputs', keys=['points'])
]

train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area_{i}.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
metainfo=metainfo,
box_type_3d='Depth') for i in train_area
])))

val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area{test_area}.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator

vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
111 changes: 111 additions & 0 deletions configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
_base_ = [
'../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
'../_base_/datasets/s3dis-3d.py'
]
dataset_type = 'S3DISDataset'
data_root = 'data/s3dis/'

metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
train_area = [1, 2, 3, 4, 6]
test_area = 5
n_points = 100000

model = dict(bbox_head=dict(num_classes=5))

train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D'),
dict(type='PointSample', num_points=n_points),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[0.9, 1.1],
translation_std=[.1, .1, .1],
shift_height=False),
dict(type='NormalizePointsColor', color_mean=None),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=n_points)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]

train_dataloader = dict(
batch_size=8,
dataset=dict(
type='RepeatDataset',
times=13,
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file=f's3dis_infos_Area_{i}.pkl',
pipeline=train_pipeline,
filter_empty_gt=True,
metainfo=metainfo,
box_type_3d='Depth') for i in train_area
])))
val_dataloader = dict(
dataset=dict(
ann_file=f's3dis_infos_Area_{test_area}.pkl', pipeline=test_pipeline))
test_dataloader = val_dataloader

optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
clip_grad=dict(max_norm=10, norm_type=2))

# learning rate
param_scheduler = dict(
type='MultiStepLR',
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)

custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]

# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
41 changes: 21 additions & 20 deletions mmdet3d/datasets/convert_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict
from typing import List, Optional, Tuple, Union

import numpy as np
from nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
Expand Down Expand Up @@ -53,19 +53,20 @@
}


def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
visibilities: List[str]):
"""Get the 2d / mono3d annotation records for a given `sample_data_token of
nuscenes dataset.
def get_nuscenes_2d_boxes(nusc: NuScenes, sample_data_token: str,
visibilities: List[str]) -> List[dict]:
"""Get the 2d / mono3d annotation records for a given `sample_data_token`
of nuscenes dataset.

Args:
nusc (:obj:`NuScenes`): NuScenes class.
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
visibilities (List[str]): Visibility filter.

Return:
list[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
List[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
"""

# Get the sample data and the sample corresponding to that sample data.
Expand Down Expand Up @@ -190,7 +191,7 @@ def get_kitti_style_2d_boxes(info: dict,
occluded: Tuple[int] = (0, 1, 2, 3),
annos: Optional[dict] = None,
mono3d: bool = True,
dataset: str = 'kitti'):
dataset: str = 'kitti') -> List[dict]:
"""Get the 2d / mono3d annotation records for a given info.

This function is used to get 2D/Mono3D annotations when loading annotations
Expand All @@ -202,7 +203,7 @@ def get_kitti_style_2d_boxes(info: dict,
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
occluded (Tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
Expand All @@ -213,8 +214,8 @@ def get_kitti_style_2d_boxes(info: dict,
Defaults to 'kitti'.

Return:
list[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
List[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
Expand Down Expand Up @@ -336,20 +337,20 @@ def convert_annos(info: dict, cam_idx: int) -> dict:


def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]:
corner_coords: List[int], imsize: Tuple[int] = (1600, 900)
) -> Union[Tuple[float], None]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.

Args:
corner_coords (list[int]): Corner coordinates of reprojected
corner_coords (List[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
imsize (Tuple[int]): Size of the image canvas.
Defaults to (1600, 900).

Return:
tuple[float]: Intersection of the convex hull of the 2D box
corners and the image canvas.
Tuple[float] or None: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
img_canvas = box(0, 0, imsize[0], imsize[1])
Expand All @@ -370,7 +371,7 @@ def post_process_coords(


def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
dataset: str) -> OrderedDict:
dataset: str) -> dict:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.

Expand All @@ -387,7 +388,7 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,

- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y of 2d box
- bbox (List[float]): left x, top y, right x, bottom y of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""

Expand Down
Loading