open-mmlab · ZwwWayne · Apr 23, 2021 · Apr 13, 2021 · Apr 14, 2021 · Apr 14, 2021
diff --git a/configs/_base_/datasets/nus-mono3d.py b/configs/_base_/datasets/nus-mono3d.py
@@ -0,0 +1,89 @@
+dataset_type = 'NuScenesMonoDataset'
+data_root = 'data/nuscenes/'
+class_names = [
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+# Input modality for nuScenes dataset, this is consistent with the submission
+# format which requires the information in input_modality.
+input_modality = dict(
+ use_lidar=False,
+ use_camera=True,
+ use_radar=False,
+ use_map=False,
+ use_external=False)
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+ dict(type='LoadImageFromFileMono3D'),
+ dict(
+ type='LoadAnnotations3D',
+ with_bbox=True,
+ with_label=True,
+ with_attr_label=True,
+ with_bbox_3d=True,
+ with_label_3d=True,
+ with_bbox_depth=True),
+ dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle3D', class_names=class_names),
+ dict(
+ type='Collect3D',
+ keys=[
+ 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
+ 'gt_labels_3d', 'centers2d', 'depths'
+ ]),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFileMono3D'),
+ dict(
+ type='MultiScaleFlipAug',
+ scale_factor=1.0,
+ flip=False,
+ transforms=[
+ dict(type='RandomFlip3D'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(
+ type='DefaultFormatBundle3D',
+ class_names=class_names,
+ with_label=False),
+ dict(type='Collect3D', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
+ img_prefix=data_root,
+ classes=class_names,
+ pipeline=train_pipeline,
+ modality=input_modality,
+ test_mode=False,
+ box_type_3d='Camera'),
+ val=dict(
+ type=dataset_type,
+ data_root=data_root,
+ ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
+ img_prefix=data_root,
+ classes=class_names,
+ pipeline=test_pipeline,
+ modality=input_modality,
+ test_mode=True,
+ box_type_3d='Camera'),
+ test=dict(
+ type=dataset_type,
+ data_root=data_root,
+ ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
+ img_prefix=data_root,
+ classes=class_names,
+ pipeline=test_pipeline,
+ modality=input_modality,
+ test_mode=True,
+ box_type_3d='Camera'))
+evaluation = dict(interval=2)
diff --git a/configs/_base_/models/fcos3d.py b/configs/_base_/models/fcos3d.py
@@ -0,0 +1,75 @@
+model = dict(
+ type='FCOSMono3D',
+ pretrained='open-mmlab://detectron2/resnet101_caffe',
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=False),
+ norm_eval=True,
+ style='caffe'),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=1,
+ add_extra_convs=True,
+ extra_convs_on_inputs=False, # use P5
+ num_outs=5,
+ relu_before_extra_convs=True),
+ bbox_head=dict(
+ type='FCOSMono3DHead',
+ num_classes=10,
+ in_channels=256,
+ stacked_convs=2,
+ feat_channels=256,
+ use_direction_classifier=True,
+ diff_rad_by_sin=True,
+ pred_attrs=True,
+ pred_velo=True,
+ dir_offset=0.7854, # pi/4
+ strides=[8, 16, 32, 64, 128],
+ group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
+ cls_branch=(256, ),
+ reg_branch=(
+ (256, ), # offset
+ (256, ), # depth
+ (256, ), # size
+ (256, ), # rot
+ () # velo
+ ),
+ dir_branch=(256, ),
+ attr_branch=(256, ),
+ loss_cls=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+ loss_dir=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_attr=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_centerness=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ norm_on_bbox=True,
+ centerness_on_reg=True,
+ center_sampling=True,
+ conv_bias=True,
+ dcn_on_last_conv=True),
+ train_cfg=dict(
+ allowed_border=0,
+ code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
+ pos_weight=-1,
+ debug=False),
+ test_cfg=dict(
+ use_rotate_nms=True,
+ nms_across_levels=False,
+ nms_pre=1000,
+ nms_thr=0.8,
+ score_thr=0.05,
+ min_bbox_size=0,
+ max_per_img=200))
diff --git a/configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py b/configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
@@ -0,0 +1,75 @@
+_base_ = [
+ '../_base_/datasets/nus-mono3d.py', '../_base_/models/fcos3d.py',
+ '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
+]
+# model settings
+model = dict(
+ backbone=dict(
+ dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+ stage_with_dcn=(False, False, True, True)))
+
+class_names = [
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+img_norm_cfg = dict(
+ mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+train_pipeline = [
+ dict(type='LoadImageFromFileMono3D'),
+ dict(
+ type='LoadAnnotations3D',
+ with_bbox=True,
+ with_label=True,
+ with_attr_label=True,
+ with_bbox_3d=True,
+ with_label_3d=True,
+ with_bbox_depth=True),
+ dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle3D', class_names=class_names),
+ dict(
+ type='Collect3D',
+ keys=[
+ 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
+ 'gt_labels_3d', 'centers2d', 'depths'
+ ]),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFileMono3D'),
+ dict(
+ type='MultiScaleFlipAug',
+ scale_factor=1.0,
+ flip=False,
+ transforms=[
+ dict(type='RandomFlip3D'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(
+ type='DefaultFormatBundle3D',
+ class_names=class_names,
+ with_label=False),
+ dict(type='Collect3D', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(pipeline=train_pipeline),
+ val=dict(pipeline=test_pipeline),
+ test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+ lr=0.002, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(
+ _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=1.0 / 3,
+ step=[8, 11])
+total_epochs = 12
+evaluation = dict(interval=2)
diff --git a/mmdet3d/models/dense_heads/__init__.py b/mmdet3d/models/dense_heads/__init__.py
@@ -1,6 +1,9 @@
 from .anchor3d_head import Anchor3DHead
+from .anchor_free_mono3d_head import AnchorFreeMono3DHead
 from .base_conv_bbox_head import BaseConvBboxHead
+from .base_mono3d_dense_head import BaseMono3DDenseHead
 from .centerpoint_head import CenterHead
+from .fcos_mono3d_head import FCOSMono3DHead
 from .free_anchor3d_head import FreeAnchor3DHead
 from .parta2_rpn_head import PartA2RPNHead
 from .shape_aware_head import ShapeAwareHead
@@ -9,5 +12,6 @@
 
 __all__ = [
  'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',
- 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead'
+ 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
+ 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead'
 ]