[Feature] Support ImVoteNet complete model (#352)

* Added image loading in SUNRGB-D dataset (#195) * image loading * format and docstring fix * removed irrelevant files * removed irrelevant files * load image only if modality is pc+img * added modality like nuscenes * Added imvotenet image branch pretrain (#217) * image loading * naive commit * format and docstring fix * removed irrelevant files * removed irrelevant files * load image only if modality is pc+img * added modality like nuscenes * pretrain_2d_model * finetune sunrgbd * finetune sunrgbd * deleted unused code * fixed a bug * resolve conflict * update config file * fix docstring and configs * integrated vote fusion * coords transform and unit test * Update docstring * refactor and add unit test * fix bug caused by mmcv upgrade; delete pdb breakpoint * add point fusion unittest * remove unused file * fix typos * updates * add assertion info * update * add unittest * add vote fusion unittest * add vote fusion unittest * [Refactor] VoteNet refactor (#322) * votenet refactor * remove file * minor update * docstring * initial update of imvotenet * [Feature] Support vote fusion (#297) * integrated vote fusion * coords transform and unit test * Update docstring * refactor and add unit test * add point fusion unittest * remove unused file * updates * add assertion info * update * add unittest * add vote fusion unittest * add vote fusion unittest * minor update * docstring * change np ops to torch * refactor test * update * refactor of image mlp and np random ops to torch * add docstring * add config and mod dataset * fix bugs * add_comments * fix bugs * fix_bug * fix bug * fix bug * fix bug * fix bug * final fix * fix bug * ? * add docstring * move train/test cfg * change img mlp default param * rename config * minor mod * change config name * move train/test cfg * some fixes and 2d utils * fix config name * fix config override issue * config simplify & reformat * explicitly set eval mode->override train() * add fix_img_branch to config * remove set_img_branch_eval_mode * temporal fix, change calibs to calib * more docstring and view/reshape, expand/repeat change * complete imvotenet docstring * fix docstring * add config and some minor fix * rename config Co-authored-by: ZwwWayne <wayne.zw@outlook.com>
open-mmlab · Mar 24, 2021 · 4eed122 · 4eed122
1 parent 097b66e
commit 4eed122
Show file tree

Hide file tree

Showing 27 changed files with 2,456 additions and 78 deletions.
diff --git a/configs/_base_/models/imvotenet_image.py b/configs/_base_/models/imvotenet_image.py
@@ -0,0 +1,108 @@
+model = dict(
+ type='ImVoteNet',
+ img_backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=False),
+ norm_eval=True,
+ style='caffe'),
+ img_neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ img_rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ img_roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=10,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0., 0., 0., 0.],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+
+ # model training and testing settings
+ train_cfg=dict(
+ img_rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ img_rpn_proposal=dict(
+ nms_across_levels=False,
+ nms_pre=2000,
+ nms_post=1000,
+ max_num=1000,
+ nms_thr=0.7,
+ min_bbox_size=0),
+ img_rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=False,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ img_rpn=dict(
+ nms_across_levels=False,
+ nms_pre=1000,
+ nms_post=1000,
+ max_num=1000,
+ nms_thr=0.7,
+ min_bbox_size=0),
+ img_rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100)))
diff --git a/configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py b/configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
@@ -0,0 +1,58 @@
+_base_ = [
+ '../_base_/datasets/sunrgbd-3d-10class.py', '../_base_/default_runtime.py',
+ '../_base_/models/imvotenet_image.py'
+]
+
+# use caffe img_norm
+img_norm_cfg = dict(
+ mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ type='Resize',
+ img_scale=[(1333, 480), (1333, 504), (1333, 528), (1333, 552),
+ (1333, 576), (1333, 600)],
+ multiscale_mode='value',
+ keep_ratio=True),
+ dict(type='RandomFlip', flip_ratio=0.5),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 600),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ train=dict(times=1, dataset=dict(pipeline=train_pipeline)),
+ val=dict(pipeline=test_pipeline),
+ test=dict(pipeline=test_pipeline))
+
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+ policy='step',
+ warmup='linear',
+ warmup_iters=500,
+ warmup_ratio=0.001,
+ step=[6])
+total_epochs = 8
+
+load_from = 'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth' # noqa