From 9687825ee675cd1ccf3e803e87b5073d3d829996 Mon Sep 17 00:00:00 2001 From: THU17cyz Date: Fri, 6 Nov 2020 16:53:04 +0800 Subject: [PATCH 1/6] image loading --- configs/imvotenet/imvotenet_temp.py | 117 ++++++++++++++++++ mmdet3d/datasets/sunrgbd_dataset.py | 37 ++++++ mmdet3d/models/detectors/__init__.py | 3 +- mmdet3d/models/detectors/imvotenet.py | 132 +++++++++++++++++++++ tools/data_converter/sunrgbd_data_utils.py | 3 +- 5 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 configs/imvotenet/imvotenet_temp.py create mode 100644 mmdet3d/models/detectors/imvotenet.py diff --git a/configs/imvotenet/imvotenet_temp.py b/configs/imvotenet/imvotenet_temp.py new file mode 100644 index 0000000000..cc7b6e0703 --- /dev/null +++ b/configs/imvotenet/imvotenet_temp.py @@ -0,0 +1,117 @@ +_base_ = [ + '../_base_/models/votenet.py', '../_base_/schedules/schedule_3x.py', + '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='ImVoteNet', + bbox_head=dict( + num_classes=10, + bbox_coder=dict( + type='PartialBinBasedBBoxCoder', + num_sizes=10, + num_dir_bins=12, + with_rot=True, + mean_sizes=[[2.114256, 1.620300, 0.927272], + [0.791118, 1.279516, 0.718182], + [0.923508, 1.867419, 0.845495], + [0.591958, 0.552978, 0.827272], + [0.699104, 0.454178, 0.75625], + [0.69519, 1.346299, 0.736364], + [0.528526, 1.002642, 1.172878], + [0.500618, 0.632163, 0.683424], + [0.404671, 1.071108, 1.688889], + [0.76584, 1.398258, 0.472728]]), + )) +dataset_type = 'SUNRGBDDataset' +data_root = 'data/sunrgbd/' +class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', + 'night_stand', 'bookshelf', 'bathtub') +train_pipeline = [ + dict( + type='LoadPointsFromFile', + shift_height=True, + load_dim=6, + use_dim=[0, 1, 2]), + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations3D'), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + ), + dict( + type='GlobalRotScaleTrans', + rot_range=[-0.523599, 0.523599], + scale_ratio_range=[0.85, 1.15], + shift_height=True), + dict(type='IndoorPointSample', num_points=20000), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict( + type='Collect3D', + keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']) +] +test_pipeline = [ + dict( + type='LoadPointsFromFile', + shift_height=True, + load_dim=6, + use_dim=[0, 1, 2]), + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug3D', + img_scale=(1333, 800), + pts_scale_ratio=1, + flip=False, + transforms=[ + dict( + type='GlobalRotScaleTrans', + rot_range=[0, 0], + scale_ratio_range=[1., 1.], + translation_std=[0, 0, 0]), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + ), + dict(type='IndoorPointSample', num_points=20000), + dict( + type='DefaultFormatBundle3D', + class_names=class_names, + with_label=False), + dict(type='Collect3D', keys=['points', 'img']) + ]) +] + +data = dict( + samples_per_gpu=16, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + 'sunrgbd_infos_train.pkl', + pipeline=train_pipeline, + classes=class_names, + filter_empty_gt=False, + # we use box_type_3d='LiDAR' in kitti and nuscenes dataset + # and box_type_3d='Depth' in sunrgbd and scannet dataset. + box_type_3d='Depth')), + val=dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + 'sunrgbd_infos_val.pkl', + pipeline=test_pipeline, + classes=class_names, + test_mode=True, + box_type_3d='Depth'), + test=dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + 'sunrgbd_infos_val.pkl', + pipeline=test_pipeline, + classes=class_names, + test_mode=True, + box_type_3d='Depth')) diff --git a/mmdet3d/datasets/sunrgbd_dataset.py b/mmdet3d/datasets/sunrgbd_dataset.py index 1e3db4acbb..01d63260a6 100644 --- a/mmdet3d/datasets/sunrgbd_dataset.py +++ b/mmdet3d/datasets/sunrgbd_dataset.py @@ -60,6 +60,43 @@ def __init__(self, filter_empty_gt=filter_empty_gt, test_mode=test_mode) + def get_data_info(self, index): + """Get data info according to the given index. + + Args: + index (int): Index of the sample data to get. + + Returns: + dict: Data information that will be passed to the data \ + preprocessing pipelines. It includes the following keys: + + - sample_idx (str): Sample index. + - pts_filename (str): Filename of point clouds. + - file_name (str): Filename of point clouds. + - ann_info (dict): Annotation info. + """ + info = self.data_infos[index] + assert info['point_cloud']['lidar_idx'] == info['image']['image_idx'] + sample_idx = info['point_cloud']['lidar_idx'] + pts_filename = osp.join(self.data_root, info['pts_path']) + img_filename = osp.join(self.data_root, info['image']['image_path']) + calib = info['calib'] + + input_dict = dict( + sample_idx=sample_idx, + pts_filename=pts_filename, + img_prefix=None, + img_info=dict(filename=img_filename), + file_name=pts_filename, + calib=calib) + + if not self.test_mode: + annos = self.get_ann_info(index) + input_dict['ann_info'] = annos + if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0: + return None + return input_dict + def get_ann_info(self, index): """Get annotation info according to the given index. diff --git a/mmdet3d/models/detectors/__init__.py b/mmdet3d/models/detectors/__init__.py index 1ee43a9a20..5a4ea38ce0 100644 --- a/mmdet3d/models/detectors/__init__.py +++ b/mmdet3d/models/detectors/__init__.py @@ -2,6 +2,7 @@ from .centerpoint import CenterPoint from .dynamic_voxelnet import DynamicVoxelNet from .h3dnet import H3DNet +from .imvotenet import ImVoteNet from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN from .mvx_two_stage import MVXTwoStageDetector from .parta2 import PartA2 @@ -12,5 +13,5 @@ __all__ = [ 'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet', - 'CenterPoint', 'SSD3DNet' + 'CenterPoint', 'SSD3DNet', 'ImVoteNet' ] diff --git a/mmdet3d/models/detectors/imvotenet.py b/mmdet3d/models/detectors/imvotenet.py new file mode 100644 index 0000000000..aee87943fb --- /dev/null +++ b/mmdet3d/models/detectors/imvotenet.py @@ -0,0 +1,132 @@ +import torch + +from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d +from mmdet.models import DETECTORS +from .single_stage import SingleStage3DDetector + + +@DETECTORS.register_module() +class ImVoteNet(SingleStage3DDetector): + """ImVoteNet model. + + https://arxiv.org/pdf/2001.10692.pdf + """ + + def __init__(self, + backbone, + bbox_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(ImVoteNet, self).__init__( + backbone=backbone, + bbox_head=bbox_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def extract_img_feat(self, img, img_metas): + """Extract features of images.""" + print(img, img_metas) # test + assert False + if self.with_img_backbone and img is not None: + input_shape = img.shape[-2:] + # update real input shape of each single img + for img_meta in img_metas: + img_meta.update(input_shape=input_shape) + + if img.dim() == 5 and img.size(0) == 1: + img.squeeze_() + elif img.dim() == 5 and img.size(0) > 1: + B, N, C, H, W = img.size() + img = img.view(B * N, C, H, W) + img_feats = self.img_backbone(img) + else: + return None + if self.with_img_neck: + img_feats = self.img_neck(img_feats) + return img_feats + + def forward_train(self, + points, + img_metas, + gt_bboxes_3d, + gt_labels_3d, + img, + pts_semantic_mask=None, + pts_instance_mask=None, + gt_bboxes_ignore=None): + """Forward of training. + + Args: + points (list[torch.Tensor]): Points of each batch. + img_metas (list): Image metas. + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. + gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. + pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic + label of each batch. + pts_instance_mask (None | list[torch.Tensor]): point-wise instance + label of each batch. + gt_bboxes_ignore (None | list[torch.Tensor]): Specify + which bounding. + + Returns: + dict: Losses. + """ + points_cat = torch.stack(points) + + x = self.extract_feat(points_cat) + bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod) + loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask, + pts_instance_mask, img_metas) + losses = self.bbox_head.loss( + bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) + return losses + + def simple_test(self, points, img_metas, img=None, rescale=False): + """Forward of testing. + + Args: + points (list[torch.Tensor]): Points of each sample. + img_metas (list): Image metas. + rescale (bool): Whether to rescale results. + + Returns: + list: Predicted 3d boxes. + """ + self.extract_img_feat(img, img_metas) + points_cat = torch.stack(points) + + x = self.extract_feat(points_cat) + bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod) + bbox_list = self.bbox_head.get_bboxes( + points_cat, bbox_preds, img_metas, rescale=rescale) + bbox_results = [ + bbox3d2result(bboxes, scores, labels) + for bboxes, scores, labels in bbox_list + ] + return bbox_results + + def aug_test(self, points, img_metas, img=None, rescale=False): + """Test with augmentation.""" + self.extract_img_feat(img, img_metas) + points_cat = [torch.stack(pts) for pts in points] + feats = self.extract_feats(points_cat, img_metas) + + # only support aug_test for one sample + aug_bboxes = [] + for x, pts_cat, img_meta in zip(feats, points_cat, img_metas): + bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod) + bbox_list = self.bbox_head.get_bboxes( + pts_cat, bbox_preds, img_meta, rescale=rescale) + bbox_list = [ + dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels) + for bboxes, scores, labels in bbox_list + ] + aug_bboxes.append(bbox_list[0]) + + # after merging, bboxes will be rescaled to the original image size + merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas, + self.bbox_head.test_cfg) + + return [merged_bboxes] diff --git a/tools/data_converter/sunrgbd_data_utils.py b/tools/data_converter/sunrgbd_data_utils.py index 65187b8444..4e502f475b 100644 --- a/tools/data_converter/sunrgbd_data_utils.py +++ b/tools/data_converter/sunrgbd_data_utils.py @@ -155,8 +155,7 @@ def process_single_scene(sample_idx): osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin')) info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin') - img_name = osp.join(self.image_dir, f'{sample_idx:06d}') - img_path = osp.join(self.image_dir, img_name) + img_path = osp.join('image', f'{sample_idx:06d}.jpg') image_info = { 'image_idx': sample_idx, 'image_shape': self.get_image_shape(sample_idx), From 8b967f8fd561fb4bedafc124d5af38fd40a53da7 Mon Sep 17 00:00:00 2001 From: THU17cyz Date: Tue, 10 Nov 2020 16:25:30 +0800 Subject: [PATCH 2/6] format and docstring fix --- ...emp.py => imvotenet_sunrgbd-3d-10class.py} | 0 mmdet3d/models/detectors/imvotenet.py | 33 +++++++------------ mmdet3d/models/detectors/votenet.py | 5 +-- 3 files changed, 13 insertions(+), 25 deletions(-) rename configs/imvotenet/{imvotenet_temp.py => imvotenet_sunrgbd-3d-10class.py} (100%) diff --git a/configs/imvotenet/imvotenet_temp.py b/configs/imvotenet/imvotenet_sunrgbd-3d-10class.py similarity index 100% rename from configs/imvotenet/imvotenet_temp.py rename to configs/imvotenet/imvotenet_sunrgbd-3d-10class.py diff --git a/mmdet3d/models/detectors/imvotenet.py b/mmdet3d/models/detectors/imvotenet.py index aee87943fb..120309bb4e 100644 --- a/mmdet3d/models/detectors/imvotenet.py +++ b/mmdet3d/models/detectors/imvotenet.py @@ -7,9 +7,7 @@ @DETECTORS.register_module() class ImVoteNet(SingleStage3DDetector): - """ImVoteNet model. - - https://arxiv.org/pdf/2001.10692.pdf + r"""ImVoteNet ``_ for 3D detection. """ def __init__(self, @@ -27,24 +25,17 @@ def __init__(self, def extract_img_feat(self, img, img_metas): """Extract features of images.""" - print(img, img_metas) # test - assert False - if self.with_img_backbone and img is not None: - input_shape = img.shape[-2:] - # update real input shape of each single img - for img_meta in img_metas: - img_meta.update(input_shape=input_shape) - - if img.dim() == 5 and img.size(0) == 1: - img.squeeze_() - elif img.dim() == 5 and img.size(0) > 1: - B, N, C, H, W = img.size() - img = img.view(B * N, C, H, W) - img_feats = self.img_backbone(img) - else: - return None - if self.with_img_neck: - img_feats = self.img_neck(img_feats) + input_shape = img.shape[-2:] + # update real input shape of each single img + for img_meta in img_metas: + img_meta.update(input_shape=input_shape) + + if img.dim() == 5 and img.size(0) == 1: + img.squeeze_() + elif img.dim() == 5 and img.size(0) > 1: + B, N, C, H, W = img.size() + img = img.view(B * N, C, H, W) + img_feats = self.img_backbone(img) return img_feats def forward_train(self, diff --git a/mmdet3d/models/detectors/votenet.py b/mmdet3d/models/detectors/votenet.py index d1e676736f..fd26ea6732 100644 --- a/mmdet3d/models/detectors/votenet.py +++ b/mmdet3d/models/detectors/votenet.py @@ -7,10 +7,7 @@ @DETECTORS.register_module() class VoteNet(SingleStage3DDetector): - """VoteNet model. - - https://arxiv.org/pdf/1904.09664.pdf - """ + r"""VoteNet ``_ for 3D detection.""" def __init__(self, backbone, From d4039122a554e015e21267bb98e7d81960b84358 Mon Sep 17 00:00:00 2001 From: THU17cyz Date: Tue, 10 Nov 2020 17:15:32 +0800 Subject: [PATCH 3/6] removed irrelevant files --- .../imvotenet/imvotenet_sunrgbd-3d-10class.py | 117 ----------------- mmdet3d/models/detectors/imvotenet.py | 123 ------------------ 2 files changed, 240 deletions(-) delete mode 100644 configs/imvotenet/imvotenet_sunrgbd-3d-10class.py delete mode 100644 mmdet3d/models/detectors/imvotenet.py diff --git a/configs/imvotenet/imvotenet_sunrgbd-3d-10class.py b/configs/imvotenet/imvotenet_sunrgbd-3d-10class.py deleted file mode 100644 index cc7b6e0703..0000000000 --- a/configs/imvotenet/imvotenet_sunrgbd-3d-10class.py +++ /dev/null @@ -1,117 +0,0 @@ -_base_ = [ - '../_base_/models/votenet.py', '../_base_/schedules/schedule_3x.py', - '../_base_/default_runtime.py' -] -# model settings -model = dict( - type='ImVoteNet', - bbox_head=dict( - num_classes=10, - bbox_coder=dict( - type='PartialBinBasedBBoxCoder', - num_sizes=10, - num_dir_bins=12, - with_rot=True, - mean_sizes=[[2.114256, 1.620300, 0.927272], - [0.791118, 1.279516, 0.718182], - [0.923508, 1.867419, 0.845495], - [0.591958, 0.552978, 0.827272], - [0.699104, 0.454178, 0.75625], - [0.69519, 1.346299, 0.736364], - [0.528526, 1.002642, 1.172878], - [0.500618, 0.632163, 0.683424], - [0.404671, 1.071108, 1.688889], - [0.76584, 1.398258, 0.472728]]), - )) -dataset_type = 'SUNRGBDDataset' -data_root = 'data/sunrgbd/' -class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', - 'night_stand', 'bookshelf', 'bathtub') -train_pipeline = [ - dict( - type='LoadPointsFromFile', - shift_height=True, - load_dim=6, - use_dim=[0, 1, 2]), - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations3D'), - dict( - type='RandomFlip3D', - sync_2d=False, - flip_ratio_bev_horizontal=0.5, - ), - dict( - type='GlobalRotScaleTrans', - rot_range=[-0.523599, 0.523599], - scale_ratio_range=[0.85, 1.15], - shift_height=True), - dict(type='IndoorPointSample', num_points=20000), - dict(type='DefaultFormatBundle3D', class_names=class_names), - dict( - type='Collect3D', - keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']) -] -test_pipeline = [ - dict( - type='LoadPointsFromFile', - shift_height=True, - load_dim=6, - use_dim=[0, 1, 2]), - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug3D', - img_scale=(1333, 800), - pts_scale_ratio=1, - flip=False, - transforms=[ - dict( - type='GlobalRotScaleTrans', - rot_range=[0, 0], - scale_ratio_range=[1., 1.], - translation_std=[0, 0, 0]), - dict( - type='RandomFlip3D', - sync_2d=False, - flip_ratio_bev_horizontal=0.5, - ), - dict(type='IndoorPointSample', num_points=20000), - dict( - type='DefaultFormatBundle3D', - class_names=class_names, - with_label=False), - dict(type='Collect3D', keys=['points', 'img']) - ]) -] - -data = dict( - samples_per_gpu=16, - workers_per_gpu=4, - train=dict( - type='RepeatDataset', - times=5, - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file=data_root + 'sunrgbd_infos_train.pkl', - pipeline=train_pipeline, - classes=class_names, - filter_empty_gt=False, - # we use box_type_3d='LiDAR' in kitti and nuscenes dataset - # and box_type_3d='Depth' in sunrgbd and scannet dataset. - box_type_3d='Depth')), - val=dict( - type=dataset_type, - data_root=data_root, - ann_file=data_root + 'sunrgbd_infos_val.pkl', - pipeline=test_pipeline, - classes=class_names, - test_mode=True, - box_type_3d='Depth'), - test=dict( - type=dataset_type, - data_root=data_root, - ann_file=data_root + 'sunrgbd_infos_val.pkl', - pipeline=test_pipeline, - classes=class_names, - test_mode=True, - box_type_3d='Depth')) diff --git a/mmdet3d/models/detectors/imvotenet.py b/mmdet3d/models/detectors/imvotenet.py deleted file mode 100644 index 120309bb4e..0000000000 --- a/mmdet3d/models/detectors/imvotenet.py +++ /dev/null @@ -1,123 +0,0 @@ -import torch - -from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d -from mmdet.models import DETECTORS -from .single_stage import SingleStage3DDetector - - -@DETECTORS.register_module() -class ImVoteNet(SingleStage3DDetector): - r"""ImVoteNet ``_ for 3D detection. - """ - - def __init__(self, - backbone, - bbox_head=None, - train_cfg=None, - test_cfg=None, - pretrained=None): - super(ImVoteNet, self).__init__( - backbone=backbone, - bbox_head=bbox_head, - train_cfg=train_cfg, - test_cfg=test_cfg, - pretrained=pretrained) - - def extract_img_feat(self, img, img_metas): - """Extract features of images.""" - input_shape = img.shape[-2:] - # update real input shape of each single img - for img_meta in img_metas: - img_meta.update(input_shape=input_shape) - - if img.dim() == 5 and img.size(0) == 1: - img.squeeze_() - elif img.dim() == 5 and img.size(0) > 1: - B, N, C, H, W = img.size() - img = img.view(B * N, C, H, W) - img_feats = self.img_backbone(img) - return img_feats - - def forward_train(self, - points, - img_metas, - gt_bboxes_3d, - gt_labels_3d, - img, - pts_semantic_mask=None, - pts_instance_mask=None, - gt_bboxes_ignore=None): - """Forward of training. - - Args: - points (list[torch.Tensor]): Points of each batch. - img_metas (list): Image metas. - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. - gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic - label of each batch. - pts_instance_mask (None | list[torch.Tensor]): point-wise instance - label of each batch. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify - which bounding. - - Returns: - dict: Losses. - """ - points_cat = torch.stack(points) - - x = self.extract_feat(points_cat) - bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod) - loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask, - pts_instance_mask, img_metas) - losses = self.bbox_head.loss( - bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) - return losses - - def simple_test(self, points, img_metas, img=None, rescale=False): - """Forward of testing. - - Args: - points (list[torch.Tensor]): Points of each sample. - img_metas (list): Image metas. - rescale (bool): Whether to rescale results. - - Returns: - list: Predicted 3d boxes. - """ - self.extract_img_feat(img, img_metas) - points_cat = torch.stack(points) - - x = self.extract_feat(points_cat) - bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod) - bbox_list = self.bbox_head.get_bboxes( - points_cat, bbox_preds, img_metas, rescale=rescale) - bbox_results = [ - bbox3d2result(bboxes, scores, labels) - for bboxes, scores, labels in bbox_list - ] - return bbox_results - - def aug_test(self, points, img_metas, img=None, rescale=False): - """Test with augmentation.""" - self.extract_img_feat(img, img_metas) - points_cat = [torch.stack(pts) for pts in points] - feats = self.extract_feats(points_cat, img_metas) - - # only support aug_test for one sample - aug_bboxes = [] - for x, pts_cat, img_meta in zip(feats, points_cat, img_metas): - bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod) - bbox_list = self.bbox_head.get_bboxes( - pts_cat, bbox_preds, img_meta, rescale=rescale) - bbox_list = [ - dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels) - for bboxes, scores, labels in bbox_list - ] - aug_bboxes.append(bbox_list[0]) - - # after merging, bboxes will be rescaled to the original image size - merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas, - self.bbox_head.test_cfg) - - return [merged_bboxes] From ecabc6911a82f32553e058089aca3fc35452985c Mon Sep 17 00:00:00 2001 From: THU17cyz Date: Tue, 10 Nov 2020 17:17:27 +0800 Subject: [PATCH 4/6] removed irrelevant files --- mmdet3d/models/detectors/__init__.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mmdet3d/models/detectors/__init__.py b/mmdet3d/models/detectors/__init__.py index 5a4ea38ce0..01d8d6d0fb 100644 --- a/mmdet3d/models/detectors/__init__.py +++ b/mmdet3d/models/detectors/__init__.py @@ -2,7 +2,6 @@ from .centerpoint import CenterPoint from .dynamic_voxelnet import DynamicVoxelNet from .h3dnet import H3DNet -from .imvotenet import ImVoteNet from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN from .mvx_two_stage import MVXTwoStageDetector from .parta2 import PartA2 @@ -11,7 +10,15 @@ from .voxelnet import VoxelNet __all__ = [ - 'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector', - 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet', - 'CenterPoint', 'SSD3DNet', 'ImVoteNet' + 'Base3DDetector', + 'VoxelNet', + 'DynamicVoxelNet', + 'MVXTwoStageDetector', + 'DynamicMVXFasterRCNN', + 'MVXFasterRCNN', + 'PartA2', + 'VoteNet', + 'H3DNet', + 'CenterPoint', + 'SSD3DNet', ] From 6ab1f1dc589349a2c8bbd8a7f548251f757b97b3 Mon Sep 17 00:00:00 2001 From: THU17cyz Date: Fri, 13 Nov 2020 15:20:36 +0800 Subject: [PATCH 5/6] load image only if modality is pc+img --- mmdet3d/datasets/sunrgbd_dataset.py | 18 +++++++++++------- mmdet3d/models/detectors/votenet.py | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/mmdet3d/datasets/sunrgbd_dataset.py b/mmdet3d/datasets/sunrgbd_dataset.py index 01d63260a6..8b57a97ae0 100644 --- a/mmdet3d/datasets/sunrgbd_dataset.py +++ b/mmdet3d/datasets/sunrgbd_dataset.py @@ -46,7 +46,7 @@ def __init__(self, ann_file, pipeline=None, classes=None, - modality=None, + modality='pconly', box_type_3d='Depth', filter_empty_gt=True, test_mode=False): @@ -59,6 +59,7 @@ def __init__(self, box_type_3d=box_type_3d, filter_empty_gt=filter_empty_gt, test_mode=test_mode) + assert modality in ['pconly', 'pc+img'] def get_data_info(self, index): """Get data info according to the given index. @@ -79,16 +80,19 @@ def get_data_info(self, index): assert info['point_cloud']['lidar_idx'] == info['image']['image_idx'] sample_idx = info['point_cloud']['lidar_idx'] pts_filename = osp.join(self.data_root, info['pts_path']) - img_filename = osp.join(self.data_root, info['image']['image_path']) - calib = info['calib'] input_dict = dict( sample_idx=sample_idx, pts_filename=pts_filename, - img_prefix=None, - img_info=dict(filename=img_filename), - file_name=pts_filename, - calib=calib) + file_name=pts_filename) + + if self.modality == 'pc+img': + img_filename = osp.join(self.data_root, + info['image']['image_path']) + calib = info['calib'] + input_dict['img_prefix'] = None + input_dict['img_info'] = dict(filename=img_filename) + input_dict['calib'] = calib if not self.test_mode: annos = self.get_ann_info(index) diff --git a/mmdet3d/models/detectors/votenet.py b/mmdet3d/models/detectors/votenet.py index fd26ea6732..6c389d5bc8 100644 --- a/mmdet3d/models/detectors/votenet.py +++ b/mmdet3d/models/detectors/votenet.py @@ -7,7 +7,7 @@ @DETECTORS.register_module() class VoteNet(SingleStage3DDetector): - r"""VoteNet ``_ for 3D detection.""" + r"""`VoteNet `_ for 3D detection.""" def __init__(self, backbone, From 43b0b1fbc49e88f64fc711eb2ac2cb1e25ab152a Mon Sep 17 00:00:00 2001 From: THU17cyz Date: Wed, 25 Nov 2020 13:57:51 +0800 Subject: [PATCH 6/6] added modality like nuscenes --- mmdet3d/datasets/sunrgbd_dataset.py | 32 ++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/mmdet3d/datasets/sunrgbd_dataset.py b/mmdet3d/datasets/sunrgbd_dataset.py index 8b57a97ae0..4bfd9ad161 100644 --- a/mmdet3d/datasets/sunrgbd_dataset.py +++ b/mmdet3d/datasets/sunrgbd_dataset.py @@ -46,7 +46,7 @@ def __init__(self, ann_file, pipeline=None, classes=None, - modality='pconly', + modality=None, box_type_3d='Depth', filter_empty_gt=True, test_mode=False): @@ -59,7 +59,12 @@ def __init__(self, box_type_3d=box_type_3d, filter_empty_gt=filter_empty_gt, test_mode=test_mode) - assert modality in ['pconly', 'pc+img'] + if self.modality is None: + self.modality = dict( + use_camera=True, + use_lidar=True, + ) + assert self.modality['use_camera'] or self.modality['use_lidar'] def get_data_info(self, index): """Get data info according to the given index. @@ -72,26 +77,29 @@ def get_data_info(self, index): preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. - - pts_filename (str): Filename of point clouds. - - file_name (str): Filename of point clouds. + - pts_filename (str, optional): Filename of point clouds. + - file_name (str, optional): Filename of point clouds. + - img_prefix (str | None, optional): Prefix of image files. + - img_info (dict, optional): Image info. + - calib (dict, optional): Camera calibration info. - ann_info (dict): Annotation info. """ info = self.data_infos[index] - assert info['point_cloud']['lidar_idx'] == info['image']['image_idx'] sample_idx = info['point_cloud']['lidar_idx'] - pts_filename = osp.join(self.data_root, info['pts_path']) + assert info['point_cloud']['lidar_idx'] == info['image']['image_idx'] + input_dict = dict(sample_idx=sample_idx) - input_dict = dict( - sample_idx=sample_idx, - pts_filename=pts_filename, - file_name=pts_filename) + if self.modality['use_lidar']: + pts_filename = osp.join(self.data_root, info['pts_path']) + input_dict['pts_filename'] = pts_filename + input_dict['file_name'] = pts_filename - if self.modality == 'pc+img': + if self.modality['use_camera']: img_filename = osp.join(self.data_root, info['image']['image_path']) - calib = info['calib'] input_dict['img_prefix'] = None input_dict['img_info'] = dict(filename=img_filename) + calib = info['calib'] input_dict['calib'] = calib if not self.test_mode: