Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support monocular 3D detection on nuScenes #392

Merged
merged 25 commits into from
Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6f65d86
Support nuscenes mono3d json info generation
Tai-Wang Mar 29, 2021
9f6cc7b
Support nuscenes mono3d dataset class
Tai-Wang Mar 29, 2021
9a2f8bb
Support attribute and bbox2d prediction in bbox3dnms and bbox3d2result
Tai-Wang Mar 29, 2021
f3671d8
Rename dataset class and add comments to 'attrs'
Tai-Wang Mar 30, 2021
ee1704f
Merge branch 'master' into nus_mono3d
Tai-Wang Mar 30, 2021
6d15d0c
Merge branch 'master' into nus_mono3d
Tai-Wang Mar 31, 2021
ff3b974
Merge branch 'master' into nus_mono3d
Tai-Wang Mar 31, 2021
169eddf
Merge branch 'master' into nus_mono3d
Tai-Wang Apr 5, 2021
239489a
Support mono3d related pipelines
Tai-Wang Apr 5, 2021
990d3d3
Fix unittest for loading 3D annotations
Tai-Wang Apr 5, 2021
e9ca4e7
Add unit test for nuscenes mono3d dataset
Tai-Wang Apr 6, 2021
1a6f26e
Rename the sample result file
Tai-Wang Apr 6, 2021
5be6b25
Upload sample data for mono3d unit test
Tai-Wang Apr 6, 2021
ba3a8ab
Upload sample data for mono3d unit test
Tai-Wang Apr 6, 2021
7bed5c3
Upload sample image for unit test
Tai-Wang Apr 6, 2021
0a0d2e3
Delete tests/data/nuscenes/samples/LIDAR_TOP/CAM_BACK_LEFT directory
Tai-Wang Apr 6, 2021
bbbbc6f
Add files via upload
Tai-Wang Apr 6, 2021
e3c3277
Remove unnecessary 'f'
Tai-Wang Apr 6, 2021
93d3816
Remove unnecessary \ in arguments
Tai-Wang Apr 6, 2021
a89a54c
Remove check for pycocotools version because it has been done in the …
Tai-Wang Apr 6, 2021
95db129
Remove unnecessary comma, add TODO and change init of attrs in format…
Tai-Wang Apr 6, 2021
e8d336d
Merge RandomFlip3D and RandomFlipMono3D
Tai-Wang Apr 6, 2021
0fe44f9
Add pytest to check whether cuda is available in the unit test
Tai-Wang Apr 6, 2021
3d3d413
Add visualization TODO
Tai-Wang Apr 7, 2021
cd1e015
Remove useless init in loading mono3d images
Tai-Wang Apr 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion mmdet3d/core/bbox/box_np_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,21 +335,40 @@ def rotation_points_single_angle(points, angle, axis=0):
return points @ rot_mat_T, rot_mat_T


def points_cam2img(points_3d, proj_mat):
def points_cam2img(points_3d, proj_mat, with_depth=False):
"""Project points in camera coordinates to image coordinates.

Args:
points_3d (np.ndarray): Points in shape (N, 3)
proj_mat (np.ndarray): Transformation matrix between coordinates.
with_depth (bool): Whether to keep depth in the output.

Returns:
np.ndarray: Points in image coordinates with shape [N, 2].
"""
points_shape = list(points_3d.shape)
points_shape[-1] = 1

assert len(proj_mat.shape) == 2, 'The dimension of the projection'\
f' matrix should be 2 instead of {len(proj_mat.shape)}.'
d1, d2 = proj_mat.shape[:2]
assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (
d1 == 4 and d2 == 4), 'The shape of the projection matrix'\
f' ({d1}*{d2}) is not supported.'
if d1 == 3:
proj_mat_expanded = np.eye(4, dtype=proj_mat.dtype)
proj_mat_expanded[:d1, :d2] = proj_mat
proj_mat = proj_mat_expanded

points_4 = np.concatenate([points_3d, np.ones(points_shape)], axis=-1)
point_2d = points_4 @ proj_mat.T
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]

if with_depth:
points_2d_depth = np.concatenate([point_2d_res, point_2d[..., 2:3]],
axis=-1)
return points_2d_depth

return point_2d_res


Expand Down
6 changes: 3 additions & 3 deletions mmdet3d/core/bbox/structures/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,11 @@ def points_cam2img(points_3d, proj_mat):
points_num = list(points_3d.shape)[:-1]

points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
assert len(proj_mat.shape) == 2, f'The dimension of the projection'\
f'matrix should be 2 instead of {len(proj_mat.shape)}.'
assert len(proj_mat.shape) == 2, 'The dimension of the projection'\
f' matrix should be 2 instead of {len(proj_mat.shape)}.'
d1, d2 = proj_mat.shape[:2]
assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (
d1 == 4 and d2 == 4), f'The shape of the projection matrix'\
d1 == 4 and d2 == 4), 'The shape of the projection matrix'\
f' ({d1}*{d2}) is not supported.'
if d1 == 3:
proj_mat_expanded = torch.eye(
Expand Down
12 changes: 10 additions & 2 deletions mmdet3d/core/bbox/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,30 @@ def bbox3d2roi(bbox_list):
return rois


def bbox3d2result(bboxes, scores, labels):
def bbox3d2result(bboxes, scores, labels, attrs=None):
Tai-Wang marked this conversation as resolved.
Show resolved Hide resolved
"""Convert detection results to a list of numpy arrays.

Args:
bboxes (torch.Tensor): Bounding boxes with shape of (n, 5).
labels (torch.Tensor): Labels with shape of (n, ).
scores (torch.Tensor): Scores with shape of (n, ).
attrs (torch.Tensor, optional): Attributes with shape of (n, ). \
Defaults to None.

Returns:
dict[str, torch.Tensor]: Bounding box results in cpu mode.

- boxes_3d (torch.Tensor): 3D boxes.
- scores (torch.Tensor): Prediction scores.
- labels_3d (torch.Tensor): Box labels.
- attrs_3d (torch.Tensor, optional): Box attributes.
"""
return dict(
result_dict = dict(
boxes_3d=bboxes.to('cpu'),
scores_3d=scores.cpu(),
labels_3d=labels.cpu())

if attrs is not None:
result_dict['attrs_3d'] = attrs.cpu()

return result_dict
46 changes: 42 additions & 4 deletions mmdet3d/core/post_processing/box3d_nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ def box3d_multiclass_nms(mlvl_bboxes,
score_thr,
max_num,
cfg,
mlvl_dir_scores=None):
mlvl_dir_scores=None,
mlvl_attr_scores=None,
mlvl_bboxes2d=None):
"""Multi-class nms for 3D boxes.

Args:
Expand All @@ -27,10 +29,15 @@ def box3d_multiclass_nms(mlvl_bboxes,
cfg (dict): Configuration dict of NMS.
mlvl_dir_scores (torch.Tensor, optional): Multi-level scores
of direction classifier. Defaults to None.
mlvl_attr_scores (torch.Tensor, optional): Multi-level scores
of attribute classifier. Defaults to None.
mlvl_bboxes2d (torch.Tensor, optional): Multi-level 2D bounding
boxes. Defaults to None.

Returns:
tuple[torch.Tensor]: Return results after nms, including 3D \
bounding boxes, scores, labels and direction scores.
bounding boxes, scores, labels, direction scores, attribute \
scores (optional) and 2D bounding boxes (optional).
"""
# do multi class nms
# the fg class id range: [0, num_classes-1]
Expand All @@ -39,6 +46,8 @@ def box3d_multiclass_nms(mlvl_bboxes,
scores = []
labels = []
dir_scores = []
attr_scores = []
bboxes2d = []
for i in range(0, num_classes):
# get bboxes and scores of this class
cls_inds = mlvl_scores[:, i] > score_thr
Expand All @@ -65,13 +74,23 @@ def box3d_multiclass_nms(mlvl_bboxes,
if mlvl_dir_scores is not None:
_mlvl_dir_scores = mlvl_dir_scores[cls_inds]
dir_scores.append(_mlvl_dir_scores[selected])
if mlvl_attr_scores is not None:
_mlvl_attr_scores = mlvl_attr_scores[cls_inds]
attr_scores.append(_mlvl_attr_scores[selected])
if mlvl_bboxes2d is not None:
_mlvl_bboxes2d = mlvl_bboxes2d[cls_inds]
bboxes2d.append(_mlvl_bboxes2d[selected])

if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
labels = torch.cat(labels, dim=0)
if mlvl_dir_scores is not None:
dir_scores = torch.cat(dir_scores, dim=0)
if mlvl_attr_scores is not None:
attr_scores = torch.cat(attr_scores, dim=0)
if mlvl_bboxes2d is not None:
bboxes2d = torch.cat(bboxes2d, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
Expand All @@ -80,12 +99,31 @@ def box3d_multiclass_nms(mlvl_bboxes,
scores = scores[inds]
if mlvl_dir_scores is not None:
dir_scores = dir_scores[inds]
if mlvl_attr_scores is not None:
attr_scores = attr_scores[inds]
if mlvl_bboxes2d is not None:
bboxes2d = bboxes2d[inds]
else:
bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1)))
scores = mlvl_scores.new_zeros((0, ))
labels = mlvl_scores.new_zeros((0, ), dtype=torch.long)
dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores
if mlvl_dir_scores is not None:
dir_scores = mlvl_scores.new_zeros((0, ))
if mlvl_attr_scores is not None:
attr_scores = mlvl_scores.new_zeros((0, ))
if mlvl_bboxes2d is not None:
bboxes2d = mlvl_scores.new_zeros((0, 4))

results = (bboxes, scores, labels)

if mlvl_dir_scores is not None:
results = results + (dir_scores, )
if mlvl_attr_scores is not None:
results = results + (attr_scores, )
if mlvl_bboxes2d is not None:
results = results + (bboxes2d, )

return results


def aligned_3d_nms(boxes, scores, classes, thresh):
Expand Down
7 changes: 4 additions & 3 deletions mmdet3d/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .kitti_dataset import KittiDataset
from .lyft_dataset import LyftDataset
from .nuscenes_dataset import NuScenesDataset
from .nuscenes_mono_dataset import NuScenesMonoDataset
from .pipelines import (BackgroundPointsFilter, GlobalRotScaleTrans,
IndoorPointSample, LoadAnnotations3D,
LoadPointsFromFile, LoadPointsFromMultiSweeps,
Expand All @@ -18,9 +19,9 @@
__all__ = [
'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'RepeatFactorDataset', 'DATASETS', 'build_dataset',
'CocoDataset', 'NuScenesDataset', 'LyftDataset', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'CocoDataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset',
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample',
'LoadAnnotations3D', 'SUNRGBDDataset', 'ScanNetDataset',
'SemanticKITTIDataset', 'Custom3DDataset', 'LoadPointsFromMultiSweeps',
Expand Down
Loading