From eb5a66ec8418d50fd8d1dc5d91b8cf481e9d9a73 Mon Sep 17 00:00:00 2001 From: yinchimaoliang Date: Fri, 31 Jul 2020 15:10:36 +0800 Subject: [PATCH 01/14] Add core unittests (#30) * Add merge_aug_bboxes_3d unittest * Add voxel_generator unittest * Change test_merge_augs * Add clean_data unittest * Finish eval_class unittest * Add kitti_eval unittest * Add do_eval unittest * Add gpu judgement for do_eval * Change test_kitti_eval and test_voxel_generator * Change to isclose * Add unittests for bbox transform * Add unittests for bbox transform * Add unittests for bbox transform * Add decode * Add pred_split unittest * Change allclose to eq --- tests/{test_anchor.py => test_anchors.py} | 36 ++- tests/test_box3d.py | 47 +++- tests/test_box_coders.py | 217 +++++++++++++++++ tests/{test_dataset => }/test_indoor_eval.py | 11 +- tests/test_kitti_eval.py | 242 +++++++++++++++++++ tests/test_merge_augs.py | 61 +++++ tests/test_voxel_generator.py | 22 ++ 7 files changed, 628 insertions(+), 8 deletions(-) rename tests/{test_anchor.py => test_anchors.py} (79%) create mode 100644 tests/test_box_coders.py rename tests/{test_dataset => }/test_indoor_eval.py (94%) create mode 100644 tests/test_kitti_eval.py create mode 100644 tests/test_merge_augs.py create mode 100644 tests/test_voxel_generator.py diff --git a/tests/test_anchor.py b/tests/test_anchors.py similarity index 79% rename from tests/test_anchor.py rename to tests/test_anchors.py index b53aca9c96..abfc8150c0 100644 --- a/tests/test_anchor.py +++ b/tests/test_anchors.py @@ -6,9 +6,43 @@ """ import torch +from mmdet3d.core.anchor import build_anchor_generator + + +def test_anchor_3d_range_generator(): + if torch.cuda.is_available(): + device = 'cuda' + else: + device = 'cpu' + anchor_generator_cfg = dict( + type='Anchor3DRangeGenerator', + ranges=[ + [0, -39.68, -0.6, 70.4, 39.68, -0.6], + [0, -39.68, -0.6, 70.4, 39.68, -0.6], + [0, -39.68, -1.78, 70.4, 39.68, -1.78], + ], + sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], + rotations=[0, 1.57], + reshape_out=False) + + anchor_generator = build_anchor_generator(anchor_generator_cfg) + repr_str = repr(anchor_generator) + expected_repr_str = 'Anchor3DRangeGenerator(anchor_range=' \ + '[[0, -39.68, -0.6, 70.4, 39.68, -0.6], ' \ + '[0, -39.68, -0.6, 70.4, 39.68, -0.6], ' \ + '[0, -39.68, -1.78, 70.4, 39.68, -1.78]],' \ + '\nscales=[1],\nsizes=[[0.6, 0.8, 1.73], ' \ + '[0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],' \ + '\nrotations=[0, 1.57],\nreshape_out=False,' \ + '\nsize_per_range=True)' + assert repr_str == expected_repr_str + featmap_size = (256, 256) + mr_anchors = anchor_generator.single_level_grid_anchors( + featmap_size, 1.1, device=device) + assert mr_anchors.shape == torch.Size([1, 256, 256, 3, 2, 7]) + def test_aligned_anchor_generator(): - from mmdet3d.core.anchor import build_anchor_generator if torch.cuda.is_available(): device = 'cuda' else: diff --git a/tests/test_box3d.py b/tests/test_box3d.py index 5a3dd47e02..4369ec8123 100644 --- a/tests/test_box3d.py +++ b/tests/test_box3d.py @@ -5,13 +5,58 @@ from mmdet3d.core.bbox import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, DepthInstance3DBoxes, - LiDARInstance3DBoxes) + LiDARInstance3DBoxes, bbox3d2roi, + bbox3d_mapping_back) from mmdet3d.core.bbox.structures.utils import (get_box_type, limit_period, points_cam2img, rotation_3d_in_axis, xywhr2xyxyr) +def test_bbox3d_mapping_back(): + bboxes = BaseInstance3DBoxes( + [[ + -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00, + 4.40900000e+00, 1.54800000e+00, -1.48801203e+00 + ], + [ + -2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01, + 4.58000000e-01, 7.82000000e-01, -4.62759755e+00 + ], + [ + -5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00, + 3.96900000e+00, 1.73200000e+00, -4.65203216e+00 + ], + [ + -3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00, + 3.85700000e+00, 1.72300000e+00, -2.81427027e+00 + ]]) + new_bboxes = bbox3d_mapping_back(bboxes, 1.1, True, True) + expected_new_bboxes = torch.tensor( + [[-4.7657, 36.3827, 0.2705, 1.8745, 4.0082, 1.4073, -1.4880], + [-24.2501, 5.0864, -0.8312, 0.3118, 0.4164, 0.7109, -4.6276], + [-5.2816, 32.1902, 0.1826, 2.1782, 3.6082, 1.5745, -4.6520], + [-28.4624, 0.9910, -0.1769, 1.7673, 3.5064, 1.5664, -2.8143]]) + assert torch.allclose(new_bboxes.tensor, expected_new_bboxes, atol=1e-4) + + +def test_bbox3d2roi(): + bbox_0 = torch.tensor( + [[-5.2422, 4.0020, 2.9757, 2.0620, 4.4090, 1.5480, -1.4880], + [-5.8097, 3.5409, 2.0088, 2.3960, 3.9690, 1.7320, -4.6520]]) + bbox_1 = torch.tensor( + [[-2.6675, 5.5949, -9.1434, 3.4300, 4.5800, 7.8200, -4.6275], + [-3.1308, 1.0900, -1.9461, 1.9440, 3.8570, 1.7230, -2.8142]]) + bbox_list = [bbox_0, bbox_1] + rois = bbox3d2roi(bbox_list) + expected_rois = torch.tensor( + [[0.0000, -5.2422, 4.0020, 2.9757, 2.0620, 4.4090, 1.5480, -1.4880], + [0.0000, -5.8097, 3.5409, 2.0088, 2.3960, 3.9690, 1.7320, -4.6520], + [1.0000, -2.6675, 5.5949, -9.1434, 3.4300, 4.5800, 7.8200, -4.6275], + [1.0000, -3.1308, 1.0900, -1.9461, 1.9440, 3.8570, 1.7230, -2.8142]]) + assert torch.all(torch.eq(rois, expected_rois)) + + def test_base_boxes3d(): # test empty initialization empty_boxes = [] diff --git a/tests/test_box_coders.py b/tests/test_box_coders.py new file mode 100644 index 0000000000..17904b6d06 --- /dev/null +++ b/tests/test_box_coders.py @@ -0,0 +1,217 @@ +import torch + +from mmdet3d.core.bbox import DepthInstance3DBoxes +from mmdet.core import build_bbox_coder + + +def test_partial_bin_based_box_coder(): + box_coder_cfg = dict( + type='PartialBinBasedBBoxCoder', + num_sizes=10, + num_dir_bins=12, + with_rot=True, + mean_sizes=[[2.114256, 1.620300, 0.927272], + [0.791118, 1.279516, 0.718182], + [0.923508, 1.867419, 0.845495], + [0.591958, 0.552978, 0.827272], + [0.699104, 0.454178, 0.75625], + [0.69519, 1.346299, 0.736364], + [0.528526, 1.002642, 1.172878], + [0.500618, 0.632163, 0.683424], + [0.404671, 1.071108, 1.688889], + [0.76584, 1.398258, 0.472728]]) + box_coder = build_bbox_coder(box_coder_cfg) + + # test eocode + gt_bboxes = DepthInstance3DBoxes( + [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486], + [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030], + [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]]) + + gt_labels = torch.tensor([0, 1, 2]) + center_target, size_class_target, size_res_target, dir_class_target, \ + dir_res_target = box_coder.encode(gt_bboxes, gt_labels) + expected_center_target = torch.tensor([[0.8308, 4.1168, -0.2413], + [2.3002, 4.8149, -0.7687], + [-1.1477, 1.8090, -0.1444]]) + expected_size_class_target = torch.tensor([0, 1, 2]) + expected_size_res_target = torch.tensor([[0.1350, 0.2241, 0.9972], + [-0.2193, -0.4166, 0.2328], + [-0.2270, -0.3401, 1.2108]]) + expected_dir_class_target = torch.tensor([3, 3, 0]) + expected_dir_res_target = torch.tensor([0.0778, 0.0322, 0.0552]) + assert torch.allclose(center_target, expected_center_target, atol=1e-4) + assert torch.all(size_class_target == expected_size_class_target) + assert torch.allclose(size_res_target, expected_size_res_target, atol=1e-4) + assert torch.all(dir_class_target == expected_dir_class_target) + assert torch.allclose(dir_res_target, expected_dir_res_target, atol=1e-4) + + # test decode + center = torch.tensor([[[0.8014, 3.4134, + -0.6133], [2.6375, 8.4191, 2.0438], + [4.2017, 5.2504, + -0.7851], [-1.0088, 5.4107, 1.6293], + [1.4837, 4.0268, 0.6222]]]) + + size_class = torch.tensor([[[ + -1.0061, -2.2788, 1.1322, -4.4380, -11.0526, -2.8113, -2.0642, -7.5886, + -4.8627, -5.0437 + ], + [ + -2.2058, -0.3527, -1.9976, 0.8815, -2.7980, + -1.9053, -0.5097, -2.0232, -1.4242, -4.1192 + ], + [ + -1.4783, -0.1009, -1.1537, 0.3052, -4.3147, + -2.6529, 0.2729, -0.3755, -2.6479, -3.7548 + ], + [ + -6.1809, -3.5024, -8.3273, 1.1252, -4.3315, + -7.8288, -4.6091, -5.8153, 0.7480, -10.1396 + ], + [ + -9.0424, -3.7883, -6.0788, -1.8855, + -10.2493, -9.7164, -1.0658, -4.1713, + 1.1173, -10.6204 + ]]]) + + size_res = torch.tensor([[[[-9.8976e-02, -5.2152e-01, -7.6421e-02], + [1.4593e-01, 5.6099e-01, 8.9421e-02], + [5.1481e-02, 3.9280e-01, 1.2705e-01], + [3.6869e-01, 7.0558e-01, 1.4647e-01], + [4.7683e-01, 3.3644e-01, 2.3481e-01], + [8.7346e-02, 8.4987e-01, 3.3265e-01], + [2.1393e-01, 8.5585e-01, 9.8948e-02], + [7.8530e-02, 5.9694e-02, -8.7211e-02], + [1.8551e-01, 1.1308e+00, -5.1864e-01], + [3.6485e-01, 7.3757e-01, 1.5264e-01]], + [[-9.5593e-01, -5.0455e-01, 1.9554e-01], + [-1.0870e-01, 1.8025e-01, 1.0228e-01], + [-8.2882e-02, -4.3771e-01, 9.2135e-02], + [-4.0840e-02, -5.9841e-02, 1.1982e-01], + [7.3448e-02, 5.2045e-02, 1.7301e-01], + [-4.0440e-02, 4.9532e-02, 1.1266e-01], + [3.5857e-02, 1.3564e-02, 1.0212e-01], + [-1.0407e-01, -5.9321e-02, 9.2622e-02], + [7.4691e-03, 9.3080e-02, -4.4077e-01], + [-6.0121e-02, -1.3381e-01, -6.8083e-02]], + [[-9.3970e-01, -9.7823e-01, -5.1075e-02], + [-1.2843e-01, -1.8381e-01, 7.1327e-02], + [-1.2247e-01, -8.1115e-01, 3.6495e-02], + [4.9154e-02, -4.5440e-02, 8.9520e-02], + [1.5653e-01, 3.5990e-02, 1.6414e-01], + [-5.9621e-02, 4.9357e-03, 1.4264e-01], + [8.5235e-04, -1.0030e-01, -3.0712e-02], + [-3.7255e-02, 2.8996e-02, 5.5545e-02], + [3.9298e-02, -4.7420e-02, -4.9147e-01], + [-1.1548e-01, -1.5895e-01, -3.9155e-02]], + [[-1.8725e+00, -7.4102e-01, 1.0524e+00], + [-3.3210e-01, 4.7828e-02, -3.2666e-02], + [-2.7949e-01, 5.5541e-02, -1.0059e-01], + [-8.5533e-02, 1.4870e-01, -1.6709e-01], + [3.8283e-01, 2.6609e-01, 2.1361e-01], + [-4.2156e-01, 3.2455e-01, 6.7309e-01], + [-2.4336e-02, -8.3366e-02, 3.9913e-01], + [8.2142e-03, 4.8323e-02, -1.5247e-01], + [-4.8142e-02, -3.0074e-01, -1.6829e-01], + [1.3274e-01, -2.3825e-01, -1.8127e-01]], + [[-1.2576e+00, -6.1550e-01, 7.9430e-01], + [-4.7222e-01, 1.5634e+00, -5.9460e-02], + [-3.5367e-01, 1.3616e+00, -1.6421e-01], + [-1.6611e-02, 2.4231e-01, -9.6188e-02], + [5.4486e-01, 4.6833e-01, 5.1151e-01], + [-6.1755e-01, 1.0292e+00, 1.2458e+00], + [-6.8152e-02, 2.4786e-01, 9.5088e-01], + [-4.8745e-02, 1.5134e-01, -9.9962e-02], + [2.4485e-03, -7.5991e-02, 1.3545e-01], + [4.1608e-01, -1.2093e-01, -3.1643e-01]]]]) + + dir_class = torch.tensor([[[ + -1.0230, -5.1965, -5.2195, 2.4030, -2.7661, -7.3399, -1.1640, -4.0630, + -5.2940, 0.8245, -3.1869, -6.1743 + ], + [ + -1.9503, -1.6940, -0.8716, -1.1494, -0.8196, + 0.2862, -0.2921, -0.7894, -0.2481, -0.9916, + -1.4304, -1.2466 + ], + [ + -1.7435, -1.2043, -0.1265, 0.5083, -0.0717, + -0.9560, -1.6171, -2.6463, -2.3863, -2.1358, + -1.8812, -2.3117 + ], + [ + -1.9282, 0.3792, -1.8426, -1.4587, -0.8582, + -3.4639, -3.2133, -3.7867, -7.6781, -6.4459, + -6.2455, -5.4797 + ], + [ + -3.1869, 0.4456, -0.5824, 0.9994, -1.0554, + -8.4232, -7.7019, -7.1382, -10.2724, + -7.8229, -8.1860, -8.6194 + ]]]) + + dir_res = torch.tensor( + [[[ + 1.1022e-01, -2.3750e-01, 2.0381e-01, 1.2177e-01, -2.8501e-01, + 1.5351e-01, 1.2218e-01, -2.0677e-01, 1.4468e-01, 1.1593e-01, + -2.6864e-01, 1.1290e-01 + ], + [ + -1.5788e-02, 4.1538e-02, -2.2857e-04, -1.4011e-02, 4.2560e-02, + -3.1186e-03, -5.0343e-02, 6.8110e-03, -2.6728e-02, -3.2781e-02, + 3.6889e-02, -1.5609e-03 + ], + [ + 1.9004e-02, 5.7105e-03, 6.0329e-02, 1.3074e-02, -2.5546e-02, + -1.1456e-02, -3.2484e-02, -3.3487e-02, 1.6609e-03, 1.7095e-02, + 1.2647e-05, 2.4814e-02 + ], + [ + 1.4482e-01, -6.3083e-02, 5.8307e-02, 9.1396e-02, -8.4571e-02, + 4.5890e-02, 5.6243e-02, -1.2448e-01, -9.5244e-02, 4.5746e-02, + -1.7390e-02, 9.0267e-02 + ], + [ + 1.8065e-01, -2.0078e-02, 8.5401e-02, 1.0784e-01, -1.2495e-01, + 2.2796e-02, 1.1310e-01, -8.4364e-02, -1.1904e-01, 6.1180e-02, + -1.8109e-02, 1.1229e-01 + ]]]) + bbox_out = dict( + center=center, + size_class=size_class, + size_res=size_res, + dir_class=dir_class, + dir_res=dir_res) + + bbox3d = box_coder.decode(bbox_out) + expected_bbox3d = torch.tensor( + [[[0.8014, 3.4134, -0.6133, 0.9750, 2.2602, 0.9725, 1.6926], + [2.6375, 8.4191, 2.0438, 0.5511, 0.4931, 0.9471, 2.6149], + [4.2017, 5.2504, -0.7851, 0.6411, 0.5075, 0.9168, 1.5839], + [-1.0088, 5.4107, 1.6293, 0.5064, 0.7017, 0.6602, 0.4605], + [1.4837, 4.0268, 0.6222, 0.4071, 0.9951, 1.8243, 1.6786]]]) + assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4) + + # test split_pred + box_preds = torch.rand(2, 79, 256) + base_xyz = torch.rand(2, 256, 3) + results = box_coder.split_pred(box_preds, base_xyz) + obj_scores = results['obj_scores'] + center = results['center'] + dir_class = results['dir_class'] + dir_res_norm = results['dir_res_norm'] + dir_res = results['dir_res'] + size_class = results['size_class'] + size_res_norm = results['size_res_norm'] + size_res = results['size_res'] + sem_scores = results['sem_scores'] + assert obj_scores.shape == torch.Size([2, 256, 2]) + assert center.shape == torch.Size([2, 256, 3]) + assert dir_class.shape == torch.Size([2, 256, 12]) + assert dir_res_norm.shape == torch.Size([2, 256, 12]) + assert dir_res.shape == torch.Size([2, 256, 12]) + assert size_class.shape == torch.Size([2, 256, 10]) + assert size_res_norm.shape == torch.Size([2, 256, 10, 3]) + assert size_res.shape == torch.Size([2, 256, 10, 3]) + assert sem_scores.shape == torch.Size([2, 256, 10]) diff --git a/tests/test_dataset/test_indoor_eval.py b/tests/test_indoor_eval.py similarity index 94% rename from tests/test_dataset/test_indoor_eval.py rename to tests/test_indoor_eval.py index 129d7d9dec..2bd9a25767 100644 --- a/tests/test_dataset/test_indoor_eval.py +++ b/tests/test_indoor_eval.py @@ -123,16 +123,15 @@ def test_indoor_eval(): box_type_3d=DepthInstance3DBoxes, box_mode_3d=Box3DMode.DEPTH) - assert abs(ret_value['cabinet_AP_0.25'] - 0.666667) < 1e-3 - assert abs(ret_value['bed_AP_0.25'] - 1.0) < 1e-3 - assert abs(ret_value['chair_AP_0.25'] - 0.5) < 1e-3 - assert abs(ret_value['mAP_0.25'] - 0.708333) < 1e-3 - assert abs(ret_value['mAR_0.25'] - 0.833333) < 1e-3 + assert np.isclose(ret_value['cabinet_AP_0.25'], 0.666667) + assert np.isclose(ret_value['bed_AP_0.25'], 1.0) + assert np.isclose(ret_value['chair_AP_0.25'], 0.5) + assert np.isclose(ret_value['mAP_0.25'], 0.708333) + assert np.isclose(ret_value['mAR_0.25'], 0.833333) def test_average_precision(): ap = average_precision( np.array([[0.25, 0.5, 0.75], [0.25, 0.5, 0.75]]), np.array([[1., 1., 1.], [1., 1., 1.]]), '11points') - print(ap[0]) assert abs(ap[0] - 0.06611571) < 0.001 diff --git a/tests/test_kitti_eval.py b/tests/test_kitti_eval.py new file mode 100644 index 0000000000..e671cabd48 --- /dev/null +++ b/tests/test_kitti_eval.py @@ -0,0 +1,242 @@ +import numpy as np +import pytest +import torch + +from mmdet3d.core.evaluation.kitti_utils.eval import (do_eval, eval_class, + kitti_eval) + + +def test_do_eval(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and CUDA') + gt_name = np.array( + ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare']) + gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.]) + gt_occluded = np.array([0, 0, 3, -1, -1, -1, -1]) + gt_alpha = np.array([-1.57, 1.85, -1.65, -10., -10., -10., -10.]) + gt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134], + [676.21954, 165.70988, 691.63745, 193.83748], + [389.4093, 182.48041, 421.49072, 202.13422], + [232.0577, 186.16724, 301.94623, 217.4024], + [758.6537, 172.98509, 816.32434, 212.76743], + [532.37, 176.35, 542.68, 185.27], + [559.62, 175.83, 575.4, 183.15]]) + gt_dimensions = np.array([[12.34, 2.85, 2.63], [3.69, 1.67, 1.87], + [2.02, 1.86, 0.6], [-1., -1., -1.], + [-1., -1., -1.], [-1., -1., -1.], + [-1., -1., -1.]]) + gt_location = np.array([[4.700e-01, 1.490e+00, 6.944e+01], + [-1.653e+01, 2.390e+00, 5.849e+01], + [4.590e+00, 1.320e+00, 4.584e+01], + [-1.000e+03, -1.000e+03, -1.000e+03], + [-1.000e+03, -1.000e+03, -1.000e+03], + [-1.000e+03, -1.000e+03, -1.000e+03], + [-1.000e+03, -1.000e+03, -1.000e+03]]) + gt_rotation_y = [-1.56, 1.57, -1.55, -10., -10., -10., -10.] + gt_anno = dict( + name=gt_name, + truncated=gt_truncated, + occluded=gt_occluded, + alpha=gt_alpha, + bbox=gt_bbox, + dimensions=gt_dimensions, + location=gt_location, + rotation_y=gt_rotation_y) + + dt_name = np.array(['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car']) + dt_truncated = np.array([0., 0., 0., 0., 0.]) + dt_occluded = np.array([0, 0, 0, 0, 0]) + dt_alpha = np.array([1.0744612, 1.2775835, 1.82563, 2.1145396, -1.7676563]) + dt_dimensions = np.array([[1.4441837, 1.7450154, 0.53160036], + [1.6501029, 1.7540325, 0.5162356], + [3.9313498, 1.4899347, 1.5655756], + [4.0111866, 1.5350999, 1.585221], + [3.7337692, 1.5117968, 1.5515774]]) + dt_location = np.array([[4.6671643, 1.285098, 45.836895], + [4.658241, 1.3088846, 45.85148], + [-16.598526, 2.298814, 58.618088], + [-18.629122, 2.2990575, 39.305355], + [7.0964046, 1.5178275, 29.32426]]) + dt_rotation_y = np.array( + [1.174933, 1.3778262, 1.550529, 1.6742425, -1.5330327]) + dt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134], + [676.21954, 165.70988, 691.63745, 193.83748], + [389.4093, 182.48041, 421.49072, 202.13422], + [232.0577, 186.16724, 301.94623, 217.4024], + [758.6537, 172.98509, 816.32434, 212.76743]]) + dt_score = np.array( + [0.18151495, 0.57920843, 0.27795696, 0.23100418, 0.21541929]) + dt_anno = dict( + name=dt_name, + truncated=dt_truncated, + occluded=dt_occluded, + alpha=dt_alpha, + bbox=dt_bbox, + dimensions=dt_dimensions, + location=dt_location, + rotation_y=dt_rotation_y, + score=dt_score) + current_classes = [1, 2, 0] + min_overlaps = np.array([[[0.5, 0.5, 0.7], [0.5, 0.5, 0.7], + [0.5, 0.5, 0.7]], + [[0.5, 0.5, 0.7], [0.25, 0.25, 0.5], + [0.25, 0.25, 0.5]]]) + eval_types = ['bbox', 'bev', '3d', 'aos'] + mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval([gt_anno], [dt_anno], + current_classes, min_overlaps, + eval_types) + expected_mAP_bbox = np.array([[[0., 0.], [9.09090909, 9.09090909], + [9.09090909, 9.09090909]], + [[0., 0.], [9.09090909, 9.09090909], + [9.09090909, 9.09090909]], + [[0., 0.], [9.09090909, 9.09090909], + [9.09090909, 9.09090909]]]) + expected_mAP_bev = np.array([[[0., 0.], [0., 0.], [0., 0.]], + [[0., 0.], [0., 0.], [0., 0.]], + [[0., 0.], [0., 0.], [0., 0.]]]) + expected_mAP_3d = np.array([[[0., 0.], [0., 0.], [0., 0.]], + [[0., 0.], [0., 0.], [0., 0.]], + [[0., 0.], [0., 0.], [0., 0.]]]) + expected_mAP_aos = np.array([[[0., 0.], [0.55020816, 0.55020816], + [0.55020816, 0.55020816]], + [[0., 0.], [8.36633862, 8.36633862], + [8.36633862, 8.36633862]], + [[0., 0.], [8.63476893, 8.63476893], + [8.63476893, 8.63476893]]]) + assert np.allclose(mAP_bbox, expected_mAP_bbox) + assert np.allclose(mAP_bev, expected_mAP_bev) + assert np.allclose(mAP_3d, expected_mAP_3d) + assert np.allclose(mAP_aos, expected_mAP_aos) + + +def test_kitti_eval(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and CUDA') + gt_name = np.array( + ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare']) + gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.]) + gt_occluded = np.array([0, 0, 3, -1, -1, -1, -1]) + gt_alpha = np.array([-1.57, 1.85, -1.65, -10., -10., -10., -10.]) + gt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134], + [676.21954, 165.70988, 691.63745, 193.83748], + [389.4093, 182.48041, 421.49072, 202.13422], + [232.0577, 186.16724, 301.94623, 217.4024], + [758.6537, 172.98509, 816.32434, 212.76743], + [532.37, 176.35, 542.68, 185.27], + [559.62, 175.83, 575.4, 183.15]]) + gt_dimensions = np.array([[12.34, 2.85, 2.63], [3.69, 1.67, 1.87], + [2.02, 1.86, 0.6], [-1., -1., -1.], + [-1., -1., -1.], [-1., -1., -1.], + [-1., -1., -1.]]) + gt_location = np.array([[4.700e-01, 1.490e+00, 6.944e+01], + [-1.653e+01, 2.390e+00, 5.849e+01], + [4.590e+00, 1.320e+00, 4.584e+01], + [-1.000e+03, -1.000e+03, -1.000e+03], + [-1.000e+03, -1.000e+03, -1.000e+03], + [-1.000e+03, -1.000e+03, -1.000e+03], + [-1.000e+03, -1.000e+03, -1.000e+03]]) + gt_rotation_y = [-1.56, 1.57, -1.55, -10., -10., -10., -10.] + gt_anno = dict( + name=gt_name, + truncated=gt_truncated, + occluded=gt_occluded, + alpha=gt_alpha, + bbox=gt_bbox, + dimensions=gt_dimensions, + location=gt_location, + rotation_y=gt_rotation_y) + + dt_name = np.array(['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car']) + dt_truncated = np.array([0., 0., 0., 0., 0.]) + dt_occluded = np.array([0, 0, 0, 0, 0]) + dt_alpha = np.array([1.0744612, 1.2775835, 1.82563, 2.1145396, -1.7676563]) + dt_dimensions = np.array([[1.4441837, 1.7450154, 0.53160036], + [1.6501029, 1.7540325, 0.5162356], + [3.9313498, 1.4899347, 1.5655756], + [4.0111866, 1.5350999, 1.585221], + [3.7337692, 1.5117968, 1.5515774]]) + dt_location = np.array([[4.6671643, 1.285098, 45.836895], + [4.658241, 1.3088846, 45.85148], + [-16.598526, 2.298814, 58.618088], + [-18.629122, 2.2990575, 39.305355], + [7.0964046, 1.5178275, 29.32426]]) + dt_rotation_y = np.array( + [1.174933, 1.3778262, 1.550529, 1.6742425, -1.5330327]) + dt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134], + [676.21954, 165.70988, 691.63745, 193.83748], + [389.4093, 182.48041, 421.49072, 202.13422], + [232.0577, 186.16724, 301.94623, 217.4024], + [758.6537, 172.98509, 816.32434, 212.76743]]) + dt_score = np.array( + [0.18151495, 0.57920843, 0.27795696, 0.23100418, 0.21541929]) + dt_anno = dict( + name=dt_name, + truncated=dt_truncated, + occluded=dt_occluded, + alpha=dt_alpha, + bbox=dt_bbox, + dimensions=dt_dimensions, + location=dt_location, + rotation_y=dt_rotation_y, + score=dt_score) + + current_classes = [1, 2, 0] + result, ret_dict = kitti_eval([gt_anno], [dt_anno], current_classes) + assert np.isclose(ret_dict['KITTI/Overall_2D_moderate'], 9.090909090909092) + assert np.isclose(ret_dict['KITTI/Overall_2D_hard'], 9.090909090909092) + + +def test_eval_class(): + gt_name = np.array( + ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare']) + gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.]) + gt_occluded = np.array([0, 0, 3, -1, -1, -1, -1]) + gt_alpha = np.array([-1.57, 1.85, -1.65, -10., -10., -10., -10.]) + gt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134], + [676.21954, 165.70988, 691.63745, 193.83748], + [389.4093, 182.48041, 421.49072, 202.13422], + [232.0577, 186.16724, 301.94623, 217.4024], + [758.6537, 172.98509, 816.32434, 212.76743], + [532.37, 176.35, 542.68, 185.27], + [559.62, 175.83, 575.4, 183.15]]) + gt_anno = dict( + name=gt_name, + truncated=gt_truncated, + occluded=gt_occluded, + alpha=gt_alpha, + bbox=gt_bbox) + + dt_name = np.array(['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car']) + dt_truncated = np.array([0., 0., 0., 0., 0.]) + dt_occluded = np.array([0, 0, 0, 0, 0]) + dt_alpha = np.array([1.0744612, 1.2775835, 1.82563, 2.1145396, -1.7676563]) + dt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134], + [676.21954, 165.70988, 691.63745, 193.83748], + [389.4093, 182.48041, 421.49072, 202.13422], + [232.0577, 186.16724, 301.94623, 217.4024], + [758.6537, 172.98509, 816.32434, 212.76743]]) + dt_score = np.array( + [0.18151495, 0.57920843, 0.27795696, 0.23100418, 0.21541929]) + dt_anno = dict( + name=dt_name, + truncated=dt_truncated, + occluded=dt_occluded, + alpha=dt_alpha, + bbox=dt_bbox, + score=dt_score) + current_classes = [1, 2, 0] + difficultys = [0, 1, 2] + metric = 0 + min_overlaps = np.array([[[0.5, 0.5, 0.7], [0.5, 0.5, 0.7], + [0.5, 0.5, 0.7]], + [[0.5, 0.5, 0.7], [0.25, 0.25, 0.5], + [0.25, 0.25, 0.5]]]) + + ret_dict = eval_class([gt_anno], [dt_anno], current_classes, difficultys, + metric, min_overlaps, True, 1) + recall_sum = np.sum(ret_dict['recall']) + precision_sum = np.sum(ret_dict['precision']) + orientation_sum = np.sum(ret_dict['orientation']) + assert np.isclose(recall_sum, 16) + assert np.isclose(precision_sum, 16) + assert np.isclose(orientation_sum, 10.252829201850309) diff --git a/tests/test_merge_augs.py b/tests/test_merge_augs.py new file mode 100644 index 0000000000..555fe283db --- /dev/null +++ b/tests/test_merge_augs.py @@ -0,0 +1,61 @@ +import mmcv +import pytest +import torch + +from mmdet3d.core import merge_aug_bboxes_3d +from mmdet3d.core.bbox import DepthInstance3DBoxes + + +def test_merge_aug_bboxes_3d(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + img_meta_0 = dict( + pcd_horizontal_flip=False, + pcd_vertical_flip=True, + pcd_scale_factor=1.0) + img_meta_1 = dict( + pcd_horizontal_flip=True, + pcd_vertical_flip=False, + pcd_scale_factor=1.0) + img_meta_2 = dict( + pcd_horizontal_flip=False, + pcd_vertical_flip=False, + pcd_scale_factor=0.5) + img_metas = [[img_meta_0], [img_meta_1], [img_meta_2]] + boxes_3d = DepthInstance3DBoxes( + torch.tensor( + [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956], + [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500], + [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]], + device='cuda')) + labels_3d = torch.tensor([0, 7, 6]) + scores_3d = torch.tensor([0.5, 1.0, 1.0]) + aug_result = dict( + boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d) + aug_results = [aug_result, aug_result, aug_result] + test_cfg = mmcv.ConfigDict( + use_rotate_nms=True, + nms_across_levels=False, + nms_thr=0.01, + score_thr=0.1, + min_bbox_size=0, + nms_pre=100, + max_num=50) + results = merge_aug_bboxes_3d(aug_results, img_metas, test_cfg) + expected_boxes_3d = torch.tensor( + [[-1.0864, -1.9045, -1.2000, 0.7128, 1.5631, 2.1045, -0.1022], + [1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 3.0394], + [-2.1728, 3.8090, -2.4000, 1.4256, 3.1262, 4.2090, 0.1022], + [2.5831, -4.8117, -1.2733, 0.5852, 0.8832, 0.9733, -1.6500], + [-2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.4916], + [5.1662, 9.6234, -2.5466, 1.1704, 1.7664, 1.9466, 1.6500], + [1.0473, -4.1687, -1.2317, 2.3021, 1.8876, 1.9696, -1.6956], + [-1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.4460], + [2.0946, 8.3374, -2.4634, 4.6042, 3.7752, 3.9392, 1.6956]]) + expected_scores_3d = torch.tensor([ + 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.5000, 0.5000 + ]) + expected_labels_3d = torch.tensor([6, 6, 6, 7, 7, 7, 0, 0, 0]) + assert torch.allclose(results['boxes_3d'].tensor, expected_boxes_3d) + assert torch.allclose(results['scores_3d'], expected_scores_3d) + assert torch.all(results['labels_3d'] == expected_labels_3d) diff --git a/tests/test_voxel_generator.py b/tests/test_voxel_generator.py new file mode 100644 index 0000000000..a49caece5c --- /dev/null +++ b/tests/test_voxel_generator.py @@ -0,0 +1,22 @@ +import numpy as np + +from mmdet3d.core.voxel.voxel_generator import VoxelGenerator + + +def test_voxel_generator(): + np.random.seed(0) + voxel_size = [0.5, 0.5, 0.5] + point_cloud_range = [0, -40, -3, 70.4, 40, 1] + max_num_points = 1000 + self = VoxelGenerator(voxel_size, point_cloud_range, max_num_points) + points = np.random.rand(1000, 4) + voxels = self.generate(points) + coors, voxels, num_points_per_voxel = voxels + expected_voxels = np.array([[7, 81, 1], [6, 81, 0], [7, 80, 1], [6, 81, 1], + [7, 81, 0], [6, 80, 1], [7, 80, 0], [6, 80, + 0]]) + expected_num_points_per_voxel = np.array( + [120, 121, 127, 134, 115, 127, 125, 131]) + assert np.all(voxels == expected_voxels) + assert coors.shape == (8, 1000, 4) + assert np.all(num_points_per_voxel == expected_num_points_per_voxel) From 65264596671b9372f4be55efd6cc51920bc987b5 Mon Sep 17 00:00:00 2001 From: yinchimaoliang Date: Sun, 2 Aug 2020 20:45:31 +0800 Subject: [PATCH 02/14] Add ply doc (#41) * Add ply convert * Change example. * Change shell to python. --- docs/getting_started.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/getting_started.md b/docs/getting_started.md index ea4591a708..da8143708d 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -213,7 +213,29 @@ Examples: ```shell python demo/pcd_demo.py demo/kitti_000008.bin configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth ``` +If you want to input a `ply` file, you can use the following function and convert it to `bin` format. Then you can use the converted `bin` file to generate demo. +Note that you need to install pandas and plyfile before using this script. This function can also be used for data preprocessing for training ```ply data```. +```python +import numpy as np +import pandas as pd +from plyfile import PlyData + +def conver_ply(input_path, output_path): + plydata = PlyData.read(input_path) # read file + data = plydata.elements[0].data # read data + data_pd = pd.DataFrame(data) # convert to DataFrame + data_np = np.zeros(data_pd.shape, dtype=np.float) # initialize array to store data + property_names = data[0].dtype.names # read names of properties + for i, name in enumerate( + property_names): # read data by property + data_np[:, i] = data_pd[name] + data_np.astype(np.float32).tofile(output_path) +``` +Examples: +```python +convert_ply('./test.ply', './test.bin') +``` ### High-level APIs for testing point clouds From d865e2cefdc92f98a27358da598ac1d835857fff Mon Sep 17 00:00:00 2001 From: Wenwei Zhang <40779233+ZwwWayne@users.noreply.github.com> Date: Sat, 8 Aug 2020 01:37:21 +0800 Subject: [PATCH 03/14] Rename CosineAnealing to CosineAnnealing (#57) --- .../dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py | 2 +- .../dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py | 2 +- docs/config.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py b/configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py index be9ffc0a8b..8add406000 100644 --- a/configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py +++ b/configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py @@ -27,7 +27,7 @@ weight_decay=0.001) lr_config = dict( _delete_=True, - policy='CosineAnealing', + policy='CosineAnnealing', warmup='linear', warmup_iters=1000, warmup_ratio=1.0 / 10, diff --git a/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py b/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py index 31b1fe72cd..c7cc6001cf 100644 --- a/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py +++ b/configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py @@ -228,7 +228,7 @@ # max_norm=10 is better for SECOND optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) lr_config = dict( - policy='CosineAnealing', + policy='CosineAnnealing', warmup='linear', warmup_iters=1000, warmup_ratio=1.0 / 10, diff --git a/docs/config.md b/docs/config.md index d4c5cb00cd..b83b4f8db4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -324,7 +324,7 @@ optimizer_config = dict( # Config used to build the optimizer hook, refer to ht max_norm=10, # max norm of the gradients norm_type=2)) # Type of the used p-norm. Can be 'inf' for infinity norm. lr_config = dict( # Learning rate scheduler config used to register LrUpdater hook - policy='step', # The policy of scheduler, also support CosineAnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9. + policy='step', # The policy of scheduler, also support CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9. warmup=None, # The warmup policy, also support `exp` and `constant`. step=[24, 32]) # Steps to decay the learning rate checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation. From 5f7b31cc26884f320f5fb07e7b168afe526382dc Mon Sep 17 00:00:00 2001 From: twang <30491025+Tai-Wang@users.noreply.github.com> Date: Sat, 8 Aug 2020 01:38:16 +0800 Subject: [PATCH 04/14] Add fuse_conv_bn from mmdet/tools (#51) --- tools/fuse_conv_bn.py | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 tools/fuse_conv_bn.py diff --git a/tools/fuse_conv_bn.py b/tools/fuse_conv_bn.py new file mode 100644 index 0000000000..c00219b7b9 --- /dev/null +++ b/tools/fuse_conv_bn.py @@ -0,0 +1,67 @@ +import argparse + +import torch +import torch.nn as nn +from mmcv.runner import save_checkpoint + +from mmdet.apis import init_detector + + +def fuse_conv_bn(conv, bn): + """During inference, the functionary of batch norm layers is turned off but + only the mean and var alone channels are used, which exposes the chance to + fuse it with the preceding conv layers to save computations and simplify + network structures.""" + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_module(m): + last_conv = None + last_conv_name = None + + for name, child in m.named_children(): + if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = fuse_conv_bn(last_conv, child) + m._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + m._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_module(child) + return m + + +def parse_args(): + parser = argparse.ArgumentParser( + description='fuse Conv and BN layers in a model') + parser.add_argument('config', help='config file path') + parser.add_argument('checkpoint', help='checkpoint file path') + parser.add_argument('out', help='output path of the converted model') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + # build the model from a config file and a checkpoint file + model = init_detector(args.config, args.checkpoint) + # fuse conv and bn layers of the model + fused_model = fuse_module(model) + save_checkpoint(fused_model, args.out) + + +if __name__ == '__main__': + main() From 27d0001e873b3102a828a27e1372873fcf81ed7e Mon Sep 17 00:00:00 2001 From: yinchimaoliang Date: Wed, 19 Aug 2020 00:28:33 +0800 Subject: [PATCH 05/14] Add modules before mg_head in centerpoint (#46) * Add centerpoint_rpn and scn, change pillar encoder and voxel_encoder * Move test_voxel_encoders. * Change names, add docstring. * Reconstruct centerpoint_rpn. * Add centerpoint_rpn. * Change SECONDFPN, delete centerpoint_fpn * Remove SparseBasicBlock. * Change SpMiddleResNetFHD to SparseEncoder. * Finish SparseEncoder unittest. * Change test_hard_simple_VFE. * Change option, add legacy. * Change docstring, change legacy. * Fix legacy bug. * Change unittest, change docstring. * Change docstring. --- .../models/middle_encoders/sparse_encoder.py | 54 ++++++++++++++++--- mmdet3d/models/necks/second_fpn.py | 45 +++++++++++----- .../models/voxel_encoders/pillar_encoder.py | 32 +++++++---- .../models/voxel_encoders/voxel_encoder.py | 8 ++- tests/test_middle_encoders.py | 26 +++++++++ tests/test_necks.py | 45 ++++++++++++++++ tests/test_voxel_encoders.py | 33 ++++++++++++ 7 files changed, 212 insertions(+), 31 deletions(-) create mode 100644 tests/test_middle_encoders.py create mode 100644 tests/test_necks.py create mode 100644 tests/test_voxel_encoders.py diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index 7f372b60ff..00462aad01 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -1,6 +1,6 @@ from torch import nn as nn -from mmdet3d.ops import make_sparse_convmodule +from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule from mmdet3d.ops import spconv as spconv from ..registry import MIDDLE_ENCODERS @@ -12,12 +12,19 @@ class SparseEncoder(nn.Module): Args: in_channels (int): The number of input channels. sparse_shape (list[int]): The sparse shape of input tensor. - norm_cfg (dict): Config of normalization layer. + order (list[str]): Order of conv module. Defaults to ('conv', + 'norm', 'act'). + norm_cfg (dict): Config of normalization layer. Defaults to + dict(type='BN1d', eps=1e-3, momentum=0.01). base_channels (int): Out channels for conv_input layer. + Defaults to 16. output_channels (int): Out channels for conv_out layer. + Defaults to 128. encoder_channels (tuple[tuple[int]]): Convolutional channels of each encode block. encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. + Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)). + block_type (str): Type of the block to use. Defaults to 'conv_module'. """ def __init__(self, @@ -30,8 +37,10 @@ def __init__(self, encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)), encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, - 1))): + 1)), + block_type='conv_module'): super().__init__() + assert block_type in ['conv_module', 'basicblock'] self.sparse_shape = sparse_shape self.in_channels = in_channels self.order = order @@ -66,7 +75,10 @@ def __init__(self, conv_type='SubMConv3d') encoder_out_channels = self.make_encoder_layers( - make_sparse_convmodule, norm_cfg, self.base_channels) + make_sparse_convmodule, + norm_cfg, + self.base_channels, + block_type=block_type) self.conv_out = make_sparse_convmodule( encoder_out_channels, @@ -111,17 +123,27 @@ def forward(self, voxel_features, coors, batch_size): return spatial_features - def make_encoder_layers(self, make_block, norm_cfg, in_channels): + def make_encoder_layers(self, + make_block, + norm_cfg, + in_channels, + block_type='conv_module', + conv_cfg=dict(type='SubMConv3d')): """make encoder layers using sparse convs. Args: make_block (method): A bounded function to build blocks. norm_cfg (dict[str]): Config of normalization layer. in_channels (int): The number of encoder input channels. + block_type (str): Type of the block to use. Defaults to + 'conv_module'. + conv_cfg (dict): Config of conv layer. Defaults to + dict(type='SubMConv3d'). Returns: int: The number of encoder output channels. """ + assert block_type in ['conv_module', 'basicblock'] self.encoder_layers = spconv.SparseSequential() for i, blocks in enumerate(self.encoder_channels): @@ -130,7 +152,7 @@ def make_encoder_layers(self, make_block, norm_cfg, in_channels): padding = tuple(self.encoder_paddings[i])[j] # each stage started with a spconv layer # except the first stage - if i != 0 and j == 0: + if i != 0 and j == 0 and block_type == 'conv_module': blocks_list.append( make_block( in_channels, @@ -141,6 +163,26 @@ def make_encoder_layers(self, make_block, norm_cfg, in_channels): padding=padding, indice_key=f'spconv{i + 1}', conv_type='SparseConv3d')) + elif block_type == 'basicblock': + if j == len(blocks) - 1 and i != len( + self.encoder_channels) - 1: + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + stride=2, + padding=padding, + indice_key=f'spconv{i + 1}', + conv_type='SparseConv3d')) + else: + blocks_list.append( + SparseBasicBlock( + out_channels, + out_channels, + norm_cfg=norm_cfg, + conv_cfg=conv_cfg)) else: blocks_list.append( make_block( diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index 2b2a404840..ec9d40ee5c 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -1,6 +1,7 @@ +import numpy as np import torch -from mmcv.cnn import (build_norm_layer, build_upsample_layer, constant_init, - is_norm, kaiming_init) +from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer, + constant_init, is_norm, kaiming_init) from torch import nn as nn from mmdet.models import NECKS @@ -11,11 +12,14 @@ class SECONDFPN(nn.Module): """FPN used in SECOND/PointPillars/PartA2/MVXNet. Args: - in_channels (list[int]): Input channels of multi-scale feature maps - out_channels (list[int]): Output channels of feature maps - upsample_strides (list[int]): Strides used to upsample the feature maps - norm_cfg (dict): Config dict of normalization layers - upsample_cfg (dict): Config dict of upsample layers + in_channels (list[int]): Input channels of multi-scale feature maps. + out_channels (list[int]): Output channels of feature maps. + upsample_strides (list[int]): Strides used to upsample the + feature maps. + norm_cfg (dict): Config dict of normalization layers. + upsample_cfg (dict): Config dict of upsample layers. + conv_cfg (dict): Config dict of conv layers. + use_conv_for_no_stride (bool): Whether to use conv when stride is 1. """ def __init__(self, @@ -23,7 +27,9 @@ def __init__(self, out_channels=[256, 256, 256], upsample_strides=[1, 2, 4], norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), - upsample_cfg=dict(type='deconv', bias=False)): + upsample_cfg=dict(type='deconv', bias=False), + conv_cfg=dict(type='Conv2d', bias=False), + use_conv_for_no_stride=False): # if for GroupNorm, # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) super(SECONDFPN, self).__init__() @@ -33,12 +39,23 @@ def __init__(self, deblocks = [] for i, out_channel in enumerate(out_channels): - upsample_layer = build_upsample_layer( - upsample_cfg, - in_channels=in_channels[i], - out_channels=out_channel, - kernel_size=upsample_strides[i], - stride=upsample_strides[i]) + stride = upsample_strides[i] + if stride > 1 or (stride == 1 and not use_conv_for_no_stride): + upsample_layer = build_upsample_layer( + upsample_cfg, + in_channels=in_channels[i], + out_channels=out_channel, + kernel_size=upsample_strides[i], + stride=upsample_strides[i]) + else: + stride = np.round(1 / stride).astype(np.int64) + upsample_layer = build_conv_layer( + conv_cfg, + in_channels=in_channels[i], + out_channels=out_channel, + kernel_size=stride, + stride=stride) + deblock = nn.Sequential(upsample_layer, build_norm_layer(norm_cfg, out_channel)[1], nn.ReLU(inplace=True)) diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 58a28cd58a..3411260971 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -31,6 +31,8 @@ class PillarFeatureNet(nn.Module): Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). mode (str, optional): The mode to gather point features. Options are 'max' or 'avg'. Defaults to 'max'. + legacy (bool): Whether to use the new behavior or + the original behavior. Defaults to True. """ def __init__(self, @@ -42,9 +44,11 @@ def __init__(self, voxel_size=(0.2, 0.2, 4), point_cloud_range=(0, -40, -3, 70.4, 40, 1), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), - mode='max'): + mode='max', + legacy=True): super(PillarFeatureNet, self).__init__() assert len(feat_channels) > 0 + self.legacy = legacy if with_cluster_center: in_channels += 3 if with_voxel_center: @@ -89,7 +93,7 @@ def forward(self, features, num_points, coors): features (torch.Tensor): Point features or raw points in shape (N, M, C). num_points (torch.Tensor): Number of points in each pillar. - coors (torch.Tensor): Coordinates of each voxel + coors (torch.Tensor): Coordinates of each voxel. Returns: torch.Tensor: Features of pillars. @@ -104,14 +108,24 @@ def forward(self, features, num_points, coors): features_ls.append(f_cluster) # Find distance of x, y, and z from pillar center + dtype = features.dtype if self._with_voxel_center: - f_center = features[:, :, :2] - f_center[:, :, 0] = f_center[:, :, 0] - ( - coors[:, 3].type_as(features).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = f_center[:, :, 1] - ( - coors[:, 2].type_as(features).unsqueeze(1) * self.vy + - self.y_offset) + if not self.legacy: + f_center = torch.zeros_like(features[:, :, :2]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + + self.y_offset) + else: + f_center = features[:, :, :2] + f_center[:, :, 0] = f_center[:, :, 0] - ( + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = f_center[:, :, 1] - ( + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + + self.y_offset) features_ls.append(f_center) if self._with_distance: diff --git a/mmdet3d/models/voxel_encoders/voxel_encoder.py b/mmdet3d/models/voxel_encoders/voxel_encoder.py index 67a4ab8162..b647eb608f 100644 --- a/mmdet3d/models/voxel_encoders/voxel_encoder.py +++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py @@ -13,10 +13,14 @@ class HardSimpleVFE(nn.Module): """Simple voxel feature encoder used in SECOND. It simply averages the values of points in a voxel. + + Args: + num_features (int): Number of features to use. Default: 4. """ - def __init__(self): + def __init__(self, num_features=4): super(HardSimpleVFE, self).__init__() + self.num_features = num_features def forward(self, features, num_points, coors): """Forward function. @@ -32,7 +36,7 @@ def forward(self, features, num_points, coors): Returns: torch.Tensor: Mean of points inside each voxel in shape (N, 3(4)) """ - points_mean = features[:, :, :4].sum( + points_mean = features[:, :, :self.num_features].sum( dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) return points_mean.contiguous() diff --git a/tests/test_middle_encoders.py b/tests/test_middle_encoders.py new file mode 100644 index 0000000000..e3ab306eb4 --- /dev/null +++ b/tests/test_middle_encoders.py @@ -0,0 +1,26 @@ +import pytest +import torch + +from mmdet3d.models.builder import build_middle_encoder + + +def test_sparse_encoder(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + sparse_encoder_cfg = dict( + type='SparseEncoder', + in_channels=5, + sparse_shape=[40, 1024, 1024], + order=('conv', 'norm', 'act'), + encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, + 128)), + encoder_paddings=((1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, + 1)), + option='basicblock') + + sparse_encoder = build_middle_encoder(sparse_encoder_cfg).cuda() + voxel_features = torch.rand([207842, 5]).cuda() + coors = torch.randint(0, 4, [207842, 4]).cuda() + + ret = sparse_encoder(voxel_features, coors, 4) + assert ret.shape == torch.Size([4, 256, 128, 128]) diff --git a/tests/test_necks.py b/tests/test_necks.py new file mode 100644 index 0000000000..7e924cadaa --- /dev/null +++ b/tests/test_necks.py @@ -0,0 +1,45 @@ +import torch + +from mmdet3d.models.builder import build_backbone, build_neck + + +def test_centerpoint_fpn(): + + second_cfg = dict( + type='SECOND', + in_channels=64, + out_channels=[64, 128, 256], + layer_nums=[3, 5, 5], + layer_strides=[2, 2, 2], + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + conv_cfg=dict(type='Conv2d', bias=False)) + + second = build_backbone(second_cfg) + + # centerpoint usage of fpn + centerpoint_fpn_cfg = dict( + type='SECONDFPN', + in_channels=[64, 128, 256], + out_channels=[128, 128, 128], + upsample_strides=[0.5, 1, 2], + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + upsample_cfg=dict(type='deconv', bias=False), + use_conv_for_no_stride=True) + + # original usage of fpn + fpn_cfg = dict( + type='SECONDFPN', + in_channels=[64, 128, 256], + upsample_strides=[1, 2, 4], + out_channels=[128, 128, 128]) + + second_fpn = build_neck(fpn_cfg) + + centerpoint_second_fpn = build_neck(centerpoint_fpn_cfg) + + input = torch.rand([4, 64, 512, 512]) + sec_output = second(input) + centerpoint_output = centerpoint_second_fpn(sec_output) + second_output = second_fpn(sec_output) + assert centerpoint_output[0].shape == torch.Size([4, 384, 128, 128]) + assert second_output[0].shape == torch.Size([4, 384, 256, 256]) diff --git a/tests/test_voxel_encoders.py b/tests/test_voxel_encoders.py new file mode 100644 index 0000000000..f7503a8c34 --- /dev/null +++ b/tests/test_voxel_encoders.py @@ -0,0 +1,33 @@ +import torch + +from mmdet3d.models.builder import build_voxel_encoder + + +def test_pillar_feature_net(): + pillar_feature_net_cfg = dict( + type='PillarFeatureNet', + in_channels=5, + feat_channels=[64], + with_distance=False, + voxel_size=(0.2, 0.2, 8), + point_cloud_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0), + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)) + + pillar_feature_net = build_voxel_encoder(pillar_feature_net_cfg) + + features = torch.rand([97297, 20, 5]) + num_voxels = torch.randint(1, 100, [97297]) + coors = torch.randint(0, 100, [97297, 4]) + + features = pillar_feature_net(features, num_voxels, coors) + assert features.shape == torch.Size([97297, 64]) + + +def test_hard_simple_VFE(): + hard_simple_VFE_cfg = dict(type='HardSimpleVFE', num_features=5) + hard_simple_VFE = build_voxel_encoder(hard_simple_VFE_cfg) + features = torch.rand([240000, 10, 5]) + num_voxels = torch.randint(1, 10, [240000]) + + outputs = hard_simple_VFE(features, num_voxels, None) + assert outputs.shape == torch.Size([240000, 5]) From 3e36ab833de21835358a9ec912a1863bf6066fbc Mon Sep 17 00:00:00 2001 From: encore-zhou Date: Wed, 19 Aug 2020 22:15:04 +0800 Subject: [PATCH 06/14] Feature h3dbackbone (#52) * add multi backbone * update multi backbone * update multi backbone * update multi backbone * modify multi backbone * modify multi_backbone params * modify docstring * update multi_backbone unittest * modify docstring --- mmdet3d/models/backbones/__init__.py | 3 +- mmdet3d/models/backbones/multi_backbone.py | 122 +++++++++++++++++++++ tests/test_backbones.py | 116 ++++++++++++++++++++ 3 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 mmdet3d/models/backbones/multi_backbone.py diff --git a/mmdet3d/models/backbones/__init__.py b/mmdet3d/models/backbones/__init__.py index 46c9b44aa3..23ce6e0410 100644 --- a/mmdet3d/models/backbones/__init__.py +++ b/mmdet3d/models/backbones/__init__.py @@ -1,9 +1,10 @@ from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt +from .multi_backbone import MultiBackbone from .nostem_regnet import NoStemRegNet from .pointnet2_sa_ssg import PointNet2SASSG from .second import SECOND __all__ = [ 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet', - 'SECOND', 'PointNet2SASSG' + 'SECOND', 'PointNet2SASSG', 'MultiBackbone' ] diff --git a/mmdet3d/models/backbones/multi_backbone.py b/mmdet3d/models/backbones/multi_backbone.py new file mode 100644 index 0000000000..d0559f90e0 --- /dev/null +++ b/mmdet3d/models/backbones/multi_backbone.py @@ -0,0 +1,122 @@ +import copy +import torch +from mmcv.cnn import ConvModule +from mmcv.runner import load_checkpoint +from torch import nn as nn + +from mmdet.models import BACKBONES, build_backbone + + +@BACKBONES.register_module() +class MultiBackbone(nn.Module): + """MultiBackbone with different configs. + + Args: + num_streams (int): The number of backbones. + backbones (list or dict): A list of backbone configs. + aggregation_mlp_channels (list[int]): Specify the mlp layers + for feature aggregation. + conv_cfg (dict): Config dict of convolutional layers. + norm_cfg (dict): Config dict of normalization layers. + act_cfg (dict): Config dict of activation layers. + suffixes (list): A list of suffixes to rename the return dict + for each backbone. + """ + + def __init__(self, + num_streams, + backbones, + aggregation_mlp_channels=None, + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01), + act_cfg=dict(type='ReLU'), + suffixes=('net0', 'net1'), + **kwargs): + super().__init__() + assert isinstance(backbones, dict) or isinstance(backbones, list) + if isinstance(backbones, dict): + backbones_list = [] + for ind in range(num_streams): + backbones_list.append(copy.deepcopy(backbones)) + backbones = backbones_list + + assert len(backbones) == num_streams + assert len(suffixes) == num_streams + + self.backbone_list = nn.ModuleList() + # Rename the ret_dict with different suffixs. + self.suffixes = suffixes + + out_channels = 0 + + for backbone_cfg in backbones: + out_channels += backbone_cfg['fp_channels'][-1][-1] + self.backbone_list.append(build_backbone(backbone_cfg)) + + # Feature aggregation layers + if aggregation_mlp_channels is None: + aggregation_mlp_channels = [ + out_channels, out_channels // 2, + out_channels // len(self.backbone_list) + ] + else: + aggregation_mlp_channels.insert(0, out_channels) + + self.aggregation_layers = nn.Sequential() + for i in range(len(aggregation_mlp_channels) - 1): + self.aggregation_layers.add_module( + f'layer{i}', + ConvModule( + aggregation_mlp_channels[i], + aggregation_mlp_channels[i + 1], + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=True)) + + def init_weights(self, pretrained=None): + """Initialize the weights of PointNet++ backbone.""" + # Do not initialize the conv layers + # to follow the original implementation + if isinstance(pretrained, str): + from mmdet3d.utils import get_root_logger + logger = get_root_logger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + + def forward(self, points): + """Forward pass. + + Args: + points (torch.Tensor): point coordinates with features, + with shape (B, N, 3 + input_feature_dim). + + Returns: + dict[str, list[torch.Tensor]]: Outputs from multiple backbones. + + - fp_xyz[suffix] (list[torch.Tensor]): The coordinates of + each fp features. + - fp_features[suffix] (list[torch.Tensor]): The features + from each Feature Propagate Layers. + - fp_indices[suffix] (list[torch.Tensor]): Indices of the + input points. + - hd_feature (torch.Tensor): The aggregation feature + from multiple backbones. + """ + ret = {} + fp_features = [] + for ind in range(len(self.backbone_list)): + cur_ret = self.backbone_list[ind](points) + cur_suffix = self.suffixes[ind] + fp_features.append(cur_ret['fp_features'][-1]) + if cur_suffix != '': + for k in cur_ret.keys(): + cur_ret[k + '_' + cur_suffix] = cur_ret.pop(k) + ret.update(cur_ret) + + # Combine the features here + hd_feature = torch.cat(fp_features, dim=1) + hd_feature = self.aggregation_layers(hd_feature) + ret['hd_feature'] = hd_feature + return ret diff --git a/tests/test_backbones.py b/tests/test_backbones.py index b353d7eedf..d69f5453fb 100644 --- a/tests/test_backbones.py +++ b/tests/test_backbones.py @@ -40,3 +40,119 @@ def test_pointnet2_sa_ssg(): assert fp_xyz[2].shape == torch.Size([1, 100, 3]) assert fp_features[2].shape == torch.Size([1, 16, 100]) assert fp_indices[2].shape == torch.Size([1, 100]) + + +def test_multi_backbone(): + if not torch.cuda.is_available(): + pytest.skip() + + # test list config + cfg_list = dict( + type='MultiBackbone', + num_streams=4, + suffixes=['net0', 'net1', 'net2', 'net3'], + backbones=[ + dict( + type='PointNet2SASSG', + in_channels=4, + num_points=(256, 128, 64, 32), + radius=(0.2, 0.4, 0.8, 1.2), + num_samples=(64, 32, 16, 16), + sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), + (128, 128, 256)), + fp_channels=((256, 256), (256, 256)), + norm_cfg=dict(type='BN2d'), + pool_mod='max'), + dict( + type='PointNet2SASSG', + in_channels=4, + num_points=(256, 128, 64, 32), + radius=(0.2, 0.4, 0.8, 1.2), + num_samples=(64, 32, 16, 16), + sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), + (128, 128, 256)), + fp_channels=((256, 256), (256, 256)), + norm_cfg=dict(type='BN2d'), + pool_mod='max'), + dict( + type='PointNet2SASSG', + in_channels=4, + num_points=(256, 128, 64, 32), + radius=(0.2, 0.4, 0.8, 1.2), + num_samples=(64, 32, 16, 16), + sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), + (128, 128, 256)), + fp_channels=((256, 256), (256, 256)), + norm_cfg=dict(type='BN2d'), + pool_mod='max'), + dict( + type='PointNet2SASSG', + in_channels=4, + num_points=(256, 128, 64, 32), + radius=(0.2, 0.4, 0.8, 1.2), + num_samples=(64, 32, 16, 16), + sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), + (128, 128, 256)), + fp_channels=((256, 256), (256, 256)), + norm_cfg=dict(type='BN2d'), + pool_mod='max') + ]) + + self = build_backbone(cfg_list) + self.cuda() + + assert len(self.backbone_list) == 4 + + xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32) + xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda() # (B, N, 6) + # test forward + ret_dict = self(xyz[:, :, :4]) + + assert ret_dict['hd_feature'].shape == torch.Size([1, 256, 128]) + assert ret_dict['fp_xyz_net0'][-1].shape == torch.Size([1, 128, 3]) + assert ret_dict['fp_features_net0'][-1].shape == torch.Size([1, 256, 128]) + + # test dict config + cfg_dict = dict( + type='MultiBackbone', + num_streams=2, + suffixes=['net0', 'net1'], + aggregation_mlp_channels=[512, 128], + backbones=dict( + type='PointNet2SASSG', + in_channels=4, + num_points=(256, 128, 64, 32), + radius=(0.2, 0.4, 0.8, 1.2), + num_samples=(64, 32, 16, 16), + sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), + (128, 128, 256)), + fp_channels=((256, 256), (256, 256)), + norm_cfg=dict(type='BN2d'), + pool_mod='max')) + + self = build_backbone(cfg_dict) + self.cuda() + + assert len(self.backbone_list) == 2 + + # test forward + ret_dict = self(xyz[:, :, :4]) + + assert ret_dict['hd_feature'].shape == torch.Size([1, 128, 128]) + assert ret_dict['fp_xyz_net0'][-1].shape == torch.Size([1, 128, 3]) + assert ret_dict['fp_features_net0'][-1].shape == torch.Size([1, 256, 128]) + + # Length of backbone configs list should be equal to num_streams + with pytest.raises(AssertionError): + cfg_list['num_streams'] = 3 + build_backbone(cfg_list) + + # Length of suffixes list should be equal to num_streams + with pytest.raises(AssertionError): + cfg_dict['suffixes'] = ['net0', 'net1', 'net2'] + build_backbone(cfg_dict) + + # Type of 'backbones' should be Dict or List[Dict]. + with pytest.raises(AssertionError): + cfg_dict['backbones'] = 'PointNet2SASSG' + build_backbone(cfg_dict) From 539897d6429f8237136116e7fc9be373e99dfa08 Mon Sep 17 00:00:00 2001 From: yinchimaoliang Date: Wed, 19 Aug 2020 22:17:42 +0800 Subject: [PATCH 07/14] load_points_from_mult_sweeps (#67) * Add unittest. * Change to 'defaults to'. * Add replace=False, remove T op, add num_sweeps=10 unittest. * Change lyft_infos.pkl. * Add nus unittest. * Add nus unittest. * Change nus_info.pkl. * Add test_mode. --- mmdet3d/datasets/pipelines/loading.py | 82 ++++++++++++++---- tests/data/lyft/lyft_infos.pkl | Bin 9957 -> 7300 bytes tests/data/nuscenes/nus_info.pkl | Bin 0 -> 26612 bytes ...+0800__LIDAR_TOP__1533201470948018.pcd.bin | Bin 0 -> 2000 bytes ...+0800__LIDAR_TOP__1533201470898274.pcd.bin | Bin 0 -> 2000 bytes tests/test_dataset/test_nuscene_dataset.py | 60 +++++++++++++ .../test_load_points_from_multi_sweeps.py | 67 ++++++++++++++ 7 files changed, 190 insertions(+), 19 deletions(-) create mode 100644 tests/data/nuscenes/nus_info.pkl create mode 100644 tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470948018.pcd.bin create mode 100644 tests/data/nuscenes/sweeps/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470898274.pcd.bin create mode 100644 tests/test_dataset/test_nuscene_dataset.py create mode 100644 tests/test_pipeline/test_load_points_from_multi_sweeps.py diff --git a/mmdet3d/datasets/pipelines/loading.py b/mmdet3d/datasets/pipelines/loading.py index 881a64e9ec..9bcff8e990 100644 --- a/mmdet3d/datasets/pipelines/loading.py +++ b/mmdet3d/datasets/pipelines/loading.py @@ -71,21 +71,37 @@ class LoadPointsFromMultiSweeps(object): This is usually used for nuScenes dataset to utilize previous sweeps. Args: - sweeps_num (int): number of sweeps. Defaults to 10. - load_dim (int): dimension number of the loaded points. Defaults to 5. + sweeps_num (int): Number of sweeps. Defaults to 10. + load_dim (int): Dimension number of the loaded points. Defaults to 5. + use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4]. file_client_args (dict): Config dict of file clients, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py for more details. Defaults to dict(backend='disk'). + pad_empty_sweeps (bool): Whether to repeat keyframe when + sweeps is empty. Defaults to False. + remove_close (bool): Whether to remove close points. + Defaults to False. + test_mode (bool): If test_model=True used for testing, it will not + randomly sample sweeps but select the nearest N frames. + Defaults to False. """ def __init__(self, sweeps_num=10, load_dim=5, - file_client_args=dict(backend='disk')): + use_dim=[0, 1, 2, 4], + file_client_args=dict(backend='disk'), + pad_empty_sweeps=False, + remove_close=False, + test_mode=False): self.load_dim = load_dim self.sweeps_num = sweeps_num + self.use_dim = use_dim self.file_client_args = file_client_args.copy() self.file_client = None + self.pad_empty_sweeps = pad_empty_sweeps + self.remove_close = remove_close + self.test_mode = test_mode def _load_points(self, pts_filename): """Private function to load point clouds data. @@ -109,6 +125,22 @@ def _load_points(self, pts_filename): points = np.fromfile(pts_filename, dtype=np.float32) return points + def _remove_close(self, points, radius=1.0): + """Removes point too close within a certain radius from origin. + + Args: + points (np.ndarray): Sweep points. + radius (float): Radius below which points are removed. + Defaults to 1.0. + + Returns: + np.ndarray: Points after removing. + """ + x_filt = np.abs(points[:, 0]) < radius + y_filt = np.abs(points[:, 1]) < radius + not_close = np.logical_not(np.logical_and(x_filt, y_filt)) + return points[not_close, :] + def __call__(self, results): """Call function to load multi-sweep point clouds from files. @@ -123,25 +155,37 @@ def __call__(self, results): - points (np.ndarray): Multi-sweep point cloud arrays. """ points = results['points'] - points[:, 3] /= 255 points[:, 4] = 0 sweep_points_list = [points] ts = results['timestamp'] - - for idx, sweep in enumerate(results['sweeps']): - if idx >= self.sweeps_num: - break - points_sweep = self._load_points(sweep['data_path']) - points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim) - sweep_ts = sweep['timestamp'] / 1e6 - points_sweep[:, 3] /= 255 - points_sweep[:, :3] = points_sweep[:, :3] @ sweep[ - 'sensor2lidar_rotation'].T - points_sweep[:, :3] += sweep['sensor2lidar_translation'] - points_sweep[:, 4] = ts - sweep_ts - sweep_points_list.append(points_sweep) - - points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]] + if self.pad_empty_sweeps and len(results['sweeps']) == 0: + for i in range(self.sweeps_num): + if self.remove_close: + sweep_points_list.append(self._remove_close(points)) + else: + sweep_points_list.append(points) + else: + if len(results['sweeps']) <= self.sweeps_num: + choices = np.arange(len(results['sweeps'])) + elif self.test_mode: + choices = np.arange(self.sweeps_num) + else: + choices = np.random.choice( + len(results['sweeps']), self.sweeps_num, replace=False) + for idx in choices: + sweep = results['sweeps'][idx] + points_sweep = self._load_points(sweep['data_path']) + points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim) + if self.remove_close: + points_sweep = self._remove_close(points_sweep) + sweep_ts = sweep['timestamp'] / 1e6 + points_sweep[:, :3] = points_sweep[:, :3] @ sweep[ + 'sensor2lidar_rotation'].T + points_sweep[:, :3] += sweep['sensor2lidar_translation'] + points_sweep[:, 4] = ts - sweep_ts + sweep_points_list.append(points_sweep) + + points = np.concatenate(sweep_points_list, axis=0)[:, self.use_dim] results['points'] = points return results diff --git a/tests/data/lyft/lyft_infos.pkl b/tests/data/lyft/lyft_infos.pkl index f1992ea38972169ee3e867d7fdd53ace7c1ebcd3..5fa2a41d135fa161668bd32ea9a9e8e2478c5c1c 100644 GIT binary patch delta 4293 zcmb7{d0bS-9l&=1QSk_ZC`jbS03wRFAih;lh%1Os(3OI~0=l3AGdwh!;I1Nv0xCQN zFFf$R0mTE9O@F_%Y11=JkF-tOv`w0xNz?R9+NQsGyRMr*yL_1U=DpwiuJ6q6&Agv^ zV2Ov#qu*nar?sT8q^h*6q{gSLQr!5k5R+b~)0tVQ$(yd*iAV_!etiG#3E%D z6=T#JJ&m4?EV`t!c1QJYpHgLaHI1BM@}Lo07PA+OI-|ac#mFo+#^|Z(Tgu|Hva(ix zPCuJ&E9+-$Lim?~%x+=dot)#G`z;~fc3!8vd2fu^0?z@F@FBD^u+N5`v6D*2Z3KH-Jk-NYysiyk@fHeZ zmdp|qHrs4chdig7=2(I)&l`0OEYU(C&Xw6bEyM;k-x7P!5@L!V5339Dw%>YXltEX7 zx9}t$DnpwDw)FAv1P@_7^h#`j(7aG)ixjrlTwBMMsBcaSROfij)+eb&UTY>MTP9ij zEdG|smVg4bR2}jP*QcoOcqLnx3A2*S46e*Bx5PG+VNQl|7%tAnF5IWNZ^DlBQJHvY zFW9AGdU6&9vp0b~mg9+6BC%tyRD=y%;NB8Pa(bYx3J2jZbU+i>=RiMR#z!geI8VX{ za20x>F9TY^+9crxxC{qC<=Qy5LPU}(vowXRG+SKhSta~Tmsy50Juf=a6HZOwUB}i@`r_48|KL%wWr8JJE%DFrUT>LHz;e`n|8RY-`j&s3euMg7 zy5FdVO-{C&gnOH0mgh?2mmF!_DbjeKunj;f_F_L=13TP>J8*GJAr8`nE6vAS3%~~Z z6`Dc&akvCq(7wD#nhr-O!zbZtD80xwt)_0T$Pf*`xF-y{YsF|9(kEu4H8c?K3f0(< zluJ^NOY_0b@Ew_flXVm8IMUi1MUZ!YeZ{6 zf^&KCa4`l?U^D406?)5LwnbrE&87)7tdg=>SYD~{^2(a>5@vZismq0_3Yl$lrDeOr zSS}5Mk%G`|OSeFafjZg`uhIfKXa#!=)Ek`7+{EAj-dVhfZ`km^=DRGgzV$uyAdHRBMj}Oxg zC#I#meRki+>8~zcne^nf>V0LFsRisM_2QId>uwRi9+|!D3g8t7*X`sUwBUfba2dAZ zb!lznP8eBHhvy|Y3McR(hwJ%SVBZT3*i&uB*0o^YvlvypA0p2L`*d0pZTJ9397JiI zb-@`pCdN~KUw~_+lBU&)m-6#QL)*GV5~~xj?vq9q0yP7!Yvpm|Ik4h-{PUFy4mu^y~joft&D_Fwl* zv$G02=SJg91hX;6w6kigG zFU#zT!mheeyJm^l(8R9G?1n4$n-1L@DZ`#{KMb$n6|)<+gqPPqeZ@;|*xaS6;mj5j z+$Gau(M1Vzw}j=}GP|R&J|Z{9Np8jRR3q!JV*|u&l{dRfy!zG1VB&R8@ETNi1rwqB z>gizO^+0_?^d71o1rx7FA{#?88+Q5p_!%!a1VcMEQDNwS>r!$O?8CFzjrX+T*9PkJ zL_Zj;1$!Z$fwQZy-8V^EjrCMXuPnokS}CdqTl}6@P8?YSO>?Pm(r78g5{V6P96GF| zH#di`DDht!df2y&hN1D;u#CKHapQ%@ zX+zN*0r$MzkbObY2p4c54G$G+%wh$zZwd3?mf3d{_FZBoYPMKyf|+Bny;;Y;r!BUT z>9pA1B6x4A?@cFo-xu(Hpe_y}Tt8F`LI~cE)E3cuTkQ)Wcs~}Y`H9Sa>WbhU2Rv;D z(c&`;&eNWS_CR2thF62|5nfm=${gG&%cITF@txUp{{$N5&gHsI65PX~*>KW;w+vw4 z4BZ*&_{3QxuQ0q_j^}AG6-PKTlRo;V(5fk(4;`^+!$vrtBe9=}Sbr|FUnuNdH-x_w ze*Q{kzjh*g&uz&Srq#hsPL)xY9f3DsfWE=nip!yCcNH4o3>mbu_l4r$$m|1!{q`xt z&9rsHQZ=y6~KXzjC`kAF}2;nlGhOU`u8{7!G%Rzq`*q_5YxzEbw zpM>oXW%iN6{_IvRHGuMq=@~if<2v>iji4=z2!28YKUOb?5y8I-f`3y#4I^fMS3|>z z;6K#NaO(Y2<>5r|U&8-SW%ilN|9?9On#undI5iCq@R#wX6g^R@N{$!h4o0xOa#!)h zR0$i$gu@^Vq{&hg?k|$z1{}F23$=JJc@Y$excTGmhG)IpO2Q zE6rj7S%gl!K@lE~uXf|QlRg0#!|6oO?*{e@k{fB0cGVw_kc`bhYUrny2lxij8s!s& z0rGF6X8Sp674EBk5)mFdNxbnh`H}SY^3q~Hw`^-wA+J*CfK*k<{lr_bvFHA3N@SFN zvTBZ8VGR&Z9cK$ZMch+VQ|T)6Kx!RPjc(*Y;*}QFXQZ8W=tPcMyLk{i_EX5<( z7KgSNJdFcIv`SB7r-W*z+1IoLMpcjXd&{hdIX5dId1@_W}IteG9{&^YlX|uZ@2gz!v!DA*g zO`(nF;jxBsXdlC={B;VY{=Rg&xQ{m$%tBifhi162w`8gThfO6B&{Biedt|p8E>x%Z z(AVOMl!52>OorR(v=Isy9@BB|77j^qu?uPA)rp7w(8i~UTo8a@Eltl;tMKX6nze7- zR;94Act=?c4-sO{%!Z14%4`^2WgbqgbAG-UA>Pa|c~Vw$6X)?r>S{+*9;Jy!YoeYO zaWpl~H+mVzD0~JzGmAstX&O{Q|HbsrRef}<8WX+E`U1Tc?#0km=CRZ&JdRrD7QA*x zp*A@RtCbo)Q^-1fj2HLh<1D($Jb_xLkF&)K$H}>TF`q*{hlh!p;9O16;h{J;TV1DT zK962$j?N(msXG5(e|$cT7I7@l%q*l<;fsVu$5Ddf;Kek_wKx_FSw|c(e2M5#AW3wU zc`~)mK$g;Nh45D-FOU)zT)86>gx`TwAQ++)<{G6{VOjr-yZX1+}Kp i6wb~vUL#gjs(3I?JV;EKlQ>J^#pRVejULw2Cj1Ym$+koQ literal 9957 zcmbW72V4|K-^UNJ1xv(&U?sL-gWbDrXWLVdH6n5B}R=sF?OR5mJmDGqp|S*XE~0``+1+|^TzvpZsu;ve`e?V+nN8&?j-+A zWD0XsGMP+CDN1TO@*9dOWFr4CM*t*^NOFm4Vw$MAQAI}=NYLbTEj_{|YGTBQv5FR9 z6o_!Arfcm*f@Bko|70R*wlE~gaU5eMNYYGO%}ltIl!7W54QZ*vWF{g{n9zgPHWv zvYeKVst!f(gk_?DFvmNPh`U+Z`D$<1d6pmv&cbk|ov-ew2u8KBX)>x|T!(S7sHP(j zcR(DOHbPD`nk>D(ca7^~%_Ju=k_=|d78nu6!ZKzT#fY?vq*yD#m?LYQCRS zN#H}57PUD(ZJ@xv<_C%21a_4jb^cyiP1Q~LQYCT3-`cM?6a|F|w)uTuxRhxq+}$!K zGB_=TXarYz!VY){Z!@0<7+q!0=^W z#JL}Uf#X-9Qf%Qwofk_AzKb8Iw^lDsX#VR&e%L`}n-%g`&Pjdx$^2}c_~wrnoxdN% z|G8o1)*iF7`5Wt}E>JPU4jSqxxxJV=;r2S)Q5O#;-oO;R^ONFwqwCpB^M2?UJMa{r zKR4b#C}lg3Y#Qo{F?D-uOct1zDGNuGVv6*|WEDkP|)InLpV{CFQ)DoQ`aMENYj>psr7hb@|g#KDRy;y z#;yVbQ#8i3C!l?c1Mp*X?xF|b3(pI>V?bS>ym!;QVnD(Ciw4xs{RcmwUSQfwQ*Zhs!UKjJ#*Zgre zugN04RYXjhGSkh!%wB7YslSL{(XL=>%{PE4Mnk=PFva5b;_Rrm2UEO(snb5!${L}e zHq(Ro<8r>fz~{%GOgg*$dmi=CkR4;{`_`C9g0xszk}1X1&leMCp$SS>T(nCe30PoQ zSWy&;qhtxzO3B3%B119?!7vs@W*Ei7F{~37Q-Y=$g>=a%!5341jETDty=wCTz!cH2 zLhtV}rg&#mPF$gZX#mEwQ%e{c)mh*t{uc1s{};H4dXD)u_&S>gm~P#z8pPi5VhZXm zmt%6^VH;>igS?mq8<=Ene^xSJ>Jv9v$V4;r_JbxK+0d8P|7>_^ck1vN{K1W}?>&1} z%>VS+PW!srOZCccHYP`PisI?QfD^3)m+}?N;jLx?Ml?i2LwzuPh}%oFqhTIQV(Hy9 zV$8b*u9h~_qz}=JU;KwC5Y5)ARBJe0>i}3FDM}X-uzbpRt zJTInzz~9R;CE;O9wxi)*Od||TrV$-``T-_!SVwnk-b}qkwhM)%NFQ^wJ(Nss5My+c}F?BX2$|PdkpO~A+C7d)#DHd3KWV7g$%ogz5EM|psaZX4Ros^TKT~3NKQ)sj=rZE`P z`z;PlS+)r(#L1bUbe@T%da?<_v1i}X`+Tc_Q5m> zw>R02rg$)A8JLb|2s3;B7-2JoZoa=UQ6Sxo-@CZCE zM79qmi^MW8=UG{&=yFm7!R;ZDdknX(y#n6f9YkWFcg;3-%U-i?ae; zjzKegG38)PP5RWkd1C@#;y3-$;`=jzDeC;e$D7(3m}X*3yVNQ9u{#BR%(T^&#w^Se z_{c|5SC6Anfa&_qEz|FQ<;C=Hecf_Qv+%IZwxc;-OrIE-dW?A>PXtUqzdP!qg-fUF z8BcOIENal7@16Fl{`2+I`IB?W0*qHA;8o<@$%^I9RO3psPKc!wi%d~V@!Ke``n#*N8rb< z-RdlPuu0hWxO&)!ow}q0rZZ30*9wgEVk%5;T8?Q29=4Tsw91QVwSnoDuPU-+kL`k?vQ_ta#YCk$zh0@(0~~ z5kESy#pQY!(;5w}^}+NRZf~6(<#{lDZeZG4IWP8FC~Y$>IdmlH?p;1@^WhuSC!gWb zdJTPnF>QEjOyEM$w6)BI*yw{vF@qZ}k!Fe$S(1h=sEc)iA3@5r!~rjwrD@7c(yUdM zoi5G-8&;=6N;C^7oeXTv(I#I^Ut&zvk1uyLc>tJtK8`*^&jw6`POXmI2F?IDn%JlqK-#TgJVdNe2k};AUY1{=H@wVEVOwlLj&Sy_hcYSIRMcg@^5HJKEyKwAH}0 z?Z~p&{aI$dyQ(Jq>mD8dRW z5DT}H*-AldbYfA zC}6pPX|I9l>ZqShdj&poZti0;a^Ta$t@r;4m`=5d-+sKri|ItQuyRcM@UZE2 zl<&p#gMlgX%J~ThFg2X+9)# zJJ?Kh=eim#d%$lk89SAKe3?gwHFQMEkd1>Jr#Lbl9nD0?N)L-7yT&F)+k40LL&r1G ziT{K#X{7j(obGer1y%^3<1UUSI4ecKoS2*@e}ql~#^avtFJXqsP=WP{l4!!>gV(=)53zNN?f@X&R{lY?dY6W3+D|jJiGsB_VtGw^>4T2 zDTzrooRl z>IXX{l)Q2c(a{ABUG&kyCEVU`c68aJg)4>@Oc}{PDTmMUCfojHNB{g6zh+Roys(Rh zb@Xoy6-bU6aEby96O&RjH7O-M$%(F(odI0K_O9E}4R8N{#}<3+8`^YuCuNomY8szg z&vQQ3!?J9izoxSMsYUF^Rg))q)64Qp;|Dw{)X*PNhT~mG@TxD-{(;b=AG(=|Zv7Jt z`X0G*77FGAuLf^}25)*a$cPF@Tb#54P7z6iQ|p5B7?Gw}*ug8X+n_A618KQ?mQz^{s{m?}Z8a zk-&pRuOl`K{Gn4*x1EUkPS6W?lTEt$g$jJ(?e0HOmj)Obyl-gGZ>UhA2sGHB^M%!u zAA$zEci4L%ZL%>R6&V^FJ9THSOVCF)UKTZBV7$PqgTI`(t>19a;G!Y3s}}6>YB198 zLb(QuF~bLT^w7)jk)gp^kEFyd$M5jHn~&PpE%O>bZ|QS&(~vlQ;^5( zF)WP_|4L^+)gMf`Yb0a49{tHQoGUSK&}}s{faAdPJvGy(R^E+G7aQ7 zzv;NdzMx#sv10eCTfK5r-+}7DSPg*Ku2#dwJSWU*b)1Ed7-QPhJMT`tr%&9xIAqwM zD}2h21G}u8G*-7yoi+7b-?6;yoWAK?=pa4vQD*0}Ak96t!fDc=EBfhn=7kHOUh?f) z;eGf4x>^G)YHCdg%LS^wi<^NS)PmTq20~b>RJAtFz-PmzixHo-yLFB?nGOwl`QaIT z&BG-ob;nU%tpg=AwJwC^l2z;BYG5x2V!K)&Lhls?<19R3`fm@tc~dHvZ}|9LS;=Fv zY^4*S$yAn~wn}k)`|v-@R}BG+n%V$DP;ga`g1dE%j8PjxTBh0v!hfP&i?4PGICX)O zOpkUOgDKE1w(HR@DS~1N7s1^2M-MGzGsxS&38=5&&-Ir1F>Cg3!z85?QjOxZfL(5&vw;%#hda!iyzWy2fyazsrXZIhjq0*l+e_02z|60 zfvbT%0%E&LLg>9BGtR==-Cg})mxHs)wR>Un;bz+^W_h(+mLIe3QPk1I|Ds(BSkzPs zLeMVuI`EQ@wVP;6zBFWHstknxM8K4Q0`6cG(hCn(>N2UB4c7d-75d*l?GX6GIo!!xc^3rz z(C5!KZCU{bEin0ZG)mj%4-vT?g>^x|!Ph!(>;DE%zR!*w-3T)@Oun5;+budcu;XJQ zU(i!cuiX=p_rc!e({-C#E?xlwo~IPIn49SpaCD8}ashXS>KJzyi0vwmjg<-5hO@AM zj|H?}d+FXCy}_-J7dLud)3?ZjvX+dB*Y{2{n^n_5e%oIW%WU^;`iF~q2(LlFH&Id2 zwVv1brpT`1TGn#XHf9WT;vF+RU~P6puxd2L`9(qR-t4D4h1>b5W@!8 zWwp3yi&a+eRvDglDTHdrwm`vsaW6r^;~!0#aHo}^pB;Gh;PiX_LBZE1-&nJLiJ{A*Zw6IkmD@4R%k^?w%}igN*@PA`)U1t%D#ZSx0qIkIn44eZM9 z=)e5jW;_L7ic}l^ZYa2aX}d)ShD18vNfq?uI?bf^rcAzvqkgwI_J3SQhYb^44i zybA96YFN2~2S9bq8$V~Zs{^sIG6fI9Sy;hg%Nra}$KT^y*5p$=w!5P5JC!n8tQpB4 zs`}n{|BABnE%pzyf0+}@H(KBF0#)pGv?XK`=mIu(d-tdqbUp*a@0RA;t)YKsm z`Y3oPZU%brA;fky5kilGhv5vYV4I0Jc4OWX-n53Dcx5a2x|PK(TiGMJDnbcOl_2y{ zuoG7UdoGCWstlp`iWHoM6`Xtb!pU}NxN-(-0eT$Vpz_r$4CEt>LVVG|>Ce|KfusGji_6-K(Pj($aFu?&TCJ~y-{L-+C` z7lMKf=jJ0VQA!;nr>i4zi>1%=)fD_21}qg~yPAfL8xp88T}3zx25iIZTeLO z_6NIuKG7yW?DC^wdsm#{Psd8N9M(7diUXlrRd*fRl@^sJFN}Q62mG~o^X+e5=xg1!fpONX!X*3MT|=&`1zLnxJ9af%2pdZ-${6weiB;NNh?qae1cqapN2ZVb+7=I{qh38Y0G z3xAEbG1PHJ$?-->e>Z*`BZi8p7*kbKCqSB%6S=|(bwv28@csXOQlX#*UpIVZ_~N>W z@Ezs(kJFRji*1kaq@{UcmnTk#7~8;gaar7l6!`jn;7AdV%0%2%!%=`&SxVolN=(zz z)sG>)Y-lFo-!L?jA-1biAoL7P7S1SrNhOI>r^3I|k(p)`oNg2>9htwqr=n&<4vbGQ zetG5p`lvIYfTrf)V&1-~qVbX#^lc`jm-THH{tbPb4Y6IF1EHsHpTN&#+y%VX_zxd- zF63+KJP4%>(a^?7S%Y>oQT-GWGu8PJI;ueRQL>s2uR^H{a6&blK!y`!dw3ZpDMihN J#Pp2t{{e+TgEs&G diff --git a/tests/data/nuscenes/nus_info.pkl b/tests/data/nuscenes/nus_info.pkl new file mode 100644 index 0000000000000000000000000000000000000000..31a2bb49420963582f89fc86cd14aafb1b16b92e GIT binary patch literal 26612 zcmeHw30PFwl|Sx^`@Zj};95l$6u7-|Ma8v5#Rajds1gu?_b3oiv`_&N#9dTCzztEr zUEHvebCOKb*(RB6Gf5_$o^&RYPI{YcolMg6yRQ}s?Y42gujiZp|4Vgq-+T4$JNLeO zf9KqD?>)cL**;ee2#)O(5D<`*YER3M+r`Q4gLCB$!Lfn(E+r|!ti-3A9f@+s*hGBc zuw^(hW+a#$<{7D(8CF}WEn`N8d1rbGj?0%WoM(!US+#mbszw_&EmWh8n5M!1q0_X6 zY1;5QuAghmq|@5>k;uMN}bkWp{YMCv2(!1Q!$!gNbgs@zGflOt`1EmiIuI|fDD z6C(7X8eL?#Asi=RHnYWMjkF{rB-jnMaJ?m5?h@M>*)n$7Z0Q+t*EqRbY)4#M%{w#X zz+Aa|aBMeRE||9>ez9rQ${4vvuH4i8op>GhOZJN0hCI_h-jc93}7mYR{Kgxa>J#XA&pYDS9Lk(8Dy_m7hY1V>4K2~YoSfi)^h zIv&$=@dtM=M@fI0`tx7^c=2IdZ2#5|D`^h*lLO=AL3r|K|K+O4`RTf=@~X#7HytKKl~ z8_WNYXj8k@k+c)Y1Q6xnN4nIn@31so=@dHA65!Teqo$0%$ThkQV^qrY0 zjwG|9n0E)pD>iFdYKB9}v^oOik?vf9sZauOR~{7|kS~w6##__S0U3d|RB;k0kBM!E zk}UFA6@a4YQ?>HA0(rbi4l0xlR`ayU&AYcI9rssilgj@l9Y1rgmbb+r+0rX&SJ3h?Se2QGML6jL` zk|Q1ROiOG}JOpKqPfB$tNvRo0R(Y1UpxL6JIVO3oubm}#QL%W2o9Mt|1Ks2zI%UMQ z`iaYtxO^NPpdzE^o7VXTZtBE$M$n^uzT4hx?jzrpWTU9wJd-@%Auq6G#`eaY7R5I$ zjam{TFU*w}1t<23O~Jj-v?}rHGi|pmYW7wZ2If&0salxDiG30mCVEVWInoji>ltRY zn)O<1f;lq6Zr50K31N1F)@%=#qY_~!mO$J7^pA_4jkvoRx_#<{hyB0$ag_AOch6)E zTlN)OBJ9CZ5~1|V zRHgsQPoL;<0;PYldVK#DrB8%%N2|)MGp%Ue2IX$LDVwVLL%Dzc+aDS>Zh>;o9=$bo z{Bc_%)Hx$lRp-PJ-tJy5x_gC5Ug_)ZRf$uCVx*#(sldS}Ch#4)&Nb5Jk&#`(sGe_- zYtas>oZOxtxpBx+D@5$TBsK4$b3QIww)dc78H$EH?|iaL}$D z5X=YC=@1{b(mg+6h zYNoxB2w0bH+%u-Z7$tpuQsQx4#knZg*Si%??itxj1$wdy^zWa1>;B(Vpa;y~f3r2v zcc?(0z2r#n8UX$AxsdoD_5jeczxKC5bKp&pUc-5w=SkD$>cbnuKhrGuR z>wAURT$8-d_xAhUf`wHN;hVFTC-5adqz|TI2NiNXWVTp!zkIOvmvmgvnV;nvA>k7~ z!m2w(W%5k&0f&4LNbibl0_pSTEr^y6<;wX2<6H6Nt1vDyw*}oIUn^Ye3}K;RDy|EF z>q8z~+W~8>&0vlUiwMtYqxlZOxZ!?%Fz2algVKh_~ivIYEh>90D?1+Ett zR`NF@R9v5V^a8F+M9&^I$)&!YJ?6%>jE0@Bfx&8oA%kNBkEAAsPMBA6sUp!yddB5_ z_YI*#5#;i6O-4#ARpK#@p-Oqr;T?%$<8FIT>|(!6>=rmCcq17^vJx& zRZx)2bb#;dgRMO-y8MJmKIxE8`N8_MsO1@xeAWZ&GCvz0upBA(#8%xyajJR1J2t$I?IQa`hj>I zpXB4z%z0er_id}JTnd-zSc+rBtE_mns8EebzT}WE1KYx*@?tw)HR4xt<*NeVVygF9 z09T)AGs<5w;d+BURHs7y8lZl~Yr+kdgh-tx91}&oJ|Q&RV%HihT8%A1V-C|>BW!Z5 zfckY{@BNdSC-UDSV8T29{Q2#VHv;O8%8_9OwJOx>kmHxZ%l;UjzC6nHOXrH+gYA8y zT+QG4arNmZQk3+pQU9auGhp`pS=L}dePYpCb`kq*|*$KPe&7TJ^72FU_2^n40NeJe`&%hIHd60K;jjmZsT)HGGdB3oYRb zDO4On7dJq0T=4oUI`8IFbA?R0v}Pn1^x|f!1R`$>0NgRjcOCLQKcwFmm3&~5A9|49 z3|e9Aug z$l*%QI`*~EA4j>q@Zo_kRen1Opila(E#-p~P=g`G~8-V!hu^ z-MQ9S|GJ9x8FQZ4a)n?1&c!b0y2D+0$3D=tk$`pMy0mRm2`1|8}b9_a4wzZ8+?TuBz}Z!wVX#4Tq49T!rXP-2EIUI8u^a` z4Bs)y-*w3U%tz`X9QXIbJ}H&V_Mv7+>UzQh^*(CpqL;kTJdMopkKtvZ=-MD4SoB< zNALXF*XdExU*$6$wEuS97TXU+v^?uuEBR%d{O@@1m)GWh`LBPFqFgy;<-dEk!W1Rt zf3xum)9?QZP`l9g>mS9uqay5AD#Gf2oBrL81j2T7dTT+DuP~dK`Td8Au%HNMQWg+) ztI-hqwXeXMhLi{Vs!tFQwjz4#wW?SZVG+Mj5vDp}+a2+iv}{|3{A=$t@Ey_AzcIW^8@OZ) zJ2%pkR6g0ICq`!17FhBF{2+LXn9>zf%_4dT1Kr%mIE$-R(NQW(-t#72LQAGqr1j*qMu&<|#UFw(0LprYKXA&I*2#-K=cwggc zRboGU^-xNogP2Dq59-8DZ7Zg;zB$Mt!nU15%qrwAv)0Hd($}XNj6;#lj>J}K;MMXA@4iES& zG;mHJ)`~8t&*5tl<@3!o^k@{0cceW7s43?F zjBaq<5;sxPlwn(7u4WEFIfS^Jw_c4%CMw`{CIhUkxy--aw^dFp90}0VwAYG z-38qJL`>*Rd30t06?46xI4anhGYg&bB#@n!y;u;*pGM_FjdVW;Zb=az-4tTvM{|b4 z#@^*yqIJg7Z6y({)4(N?z)coCr>bFmOxl#-Mm*O_TwtQSZZUMcA3F!oW2)gxDfS`8 zsnJ{%o;WYWxY(YF*=bpj00kPiq~$-jKpq9cka1k1bEyLBfIru@lA4tY0WP#wQy73= zfd($>2H2hz_PS;=-Qm1ZfI0BwuiI!|e+Bhn;n53-g8rQ!w;HN*3&Jh*blSM7{pPVV zn`(!0V^TUA#^vpCzAQTJ(ONH-`WRci&qIlh_3h6OLo!5v1$9Mdm9Dn|sQe&G#ys{g zmn=a2>oC*NP+^p4k4f6DR6k-9Kk;kbzD5tgL@~mK_$%sU*pT`lfb6&)qMpnrIEOR% zX(&5~^TFA4d`kz>4hyL`#bf4Nl1_Z+ULWIf;CG(TtoxE?d>Ykn8zHbgVGifFj~|LJ z=kd*qEmSfvnw_2S24{9~!>)EH#bfetd?tBemD11XR|uTN>@du|E>YeJa&7EGr8&d6 zVj`Y>+<_{rbb}5q#6<}ooUw?WQ8ipbSB8qKy;y^mJgtBi_nGDuP?c6MK56vi;Z(K@ zHg6i|FGvrh!ddKGzN$m67DG0Wx|{1JipE48N4mk;S{z#VLMmUGvVXgfTnj^akZ;CXsml@0yJ~<{MMAfG8X>HY(Ff3n(hj()dI)tJ%eIs292<~72fetsJdVzEe zddf%X%(Nk)s|7fpRLL-MAHqX^WVdyWs*u45@zOlAqmzHu=lVh5lFJ zwGXb-va=uNt5p*haghK*0gP`c#)ImCe9hmkpkF*DW^X?~yWi|Tit_1E3jv{JO!SlqL%Q*$E&@(|L%kQ6A$^T*^-6eAi@0_PeA0VkHi`|AQVh41bZ}(f zNyeDn^kgp8E)Y#R)GZOO&P8ssN4NSK{geUfet-&>)4A}G=&7+>&vm1>@>5t5I3kxj z@?p%Vs^=TCp=6IZzgv`_CBA~CT2&L#w6#);QvvI8eNF&7yV8-q?9@)_J`;;1w+9*H zk|$y8r>ZC;Y~((4{SB(-gA!dtFR4guXY`GjkNX)-?xFeSngAo0!_%cBLxP31!AeO5 z`tA(onP|x_SSYVE8R$*~YJ8iHVCWtJa6y;&K5%t98yY3v-UIe-eT_4?GDtN5qCL-I zP5EjnRK6Gr1^?!L+?Z{Aa}c_^Ifu(vW0rdvIv+jCCx6spt8p;FO=^&jL8f8cVbg6a{H!tJ*1g?~PN?##)yQ@-C zEh>hmTmGXbUC<;Z$MdZL5KzXKV=%m(*1>|CwJ}1c@p9xjH zi=Vf8?HgfqYFG{x4lpj`%W+Wj!*pa4J6FQ~-HhQ!vhY4VeqEr@a^Z#cn%2)4;%yAP zXh14oh~&H7GbJoPpB^-pD|O?IeSpI9y*tKfjJ@dTz|9zzE@QkBgS6_msV+?mCdHC< zOYb*JnEw^RSq|LCS0p=Jr89K6ADt3ITJ@qu3s)NkvvU(QWz%yzy5=gme8Y|aWXm7{ z`v~B!A$LKeTvf7^!A#Bg_Bkonib=%rJg$lK-I2 zF!JR;CI-iLz$|HB=a_+`1ApRulJFDY(Kd)Q=|7-GJw?5vs5ba?1{E2oE?GW!d zirsqAMxq^GH~;Il=il?Akzet7-TwW1d%mJEaqIu}_WZo{@paqhb@RV2K0j~y@_&g@ z^{*!RZw~omOKf*Q1nYKLwv;q$l4G|LfH*17*s8P>=Rt)^dmK$l2c+(0y!Hi3M{!FH z70T{_wb*(%Ci!re%i-l;l?KzXJu)8%7(&W(6RVj7B{t!NEgfq(5t<9c2thoh{5LJ87yqn;i+Tmq+Gs~loo{$ zD@42u<-2KHsVWoIa)pEq!c6QO2`Ae{mC^KM16=w_x`LX#@R?<@4W>eVhRK=>^NO2` z={8^82nTFwzZuDE(W=GdvQuSW6W`u17D_3f^Y*TdMoS#VMa5*y|LV0kzm3Y&Ur=3jAy$1JXC`2&R=(fDu2KwGX1MLwp&zbW}`byJN5@dr<06PE9WpW>8{URcF95Ol{;+ zI)O)O2hAKCgodt#*V-mZWr!GF3 zMy1QR3V0HobOl8e)7?>lVjiE8nl%NExl4~55A=z29!nys{dn(-pl9*% zUcY$X`<(YELP5mG`<(atF7B^=au9>y{>CdoShDzt_*C)n=JQrS#N>*P zw*c?aTLEu=Z$);wvj=!9V(?rm{qUsd!Tva!lmSQ`%0Q$oJ*b$~0B`i*Ad%P8gM-C6 zdT&%h52#pzmb)kqoiZIyh%6O4eXq!9sUJ1f=oH7AvaIP{@9Ml+XUzoiz zNj$J!l=Mk-0yTiY6`KGof>4!B5IGH;0&VCtjbU17_%x0FWj28!G{T^X&_!xAVc}XY zn?Mmz^~KyPA>vBBsA|8I$>Q=8WAGJWaMU(1h={B3$Vh#-HBujGwwSg0@X!QZnB8W# zM4D|@Wva;e>5=%#G~ACd_ru#qEnyOyX*4ahlh2!v$kR@H(S^B2ZbY|Bx~^pCBqkF zV=MP>$IZhVzVJeXz7mE9k94toA@J2f`bs$Rc`cGboWmkT;Am1JkqV0>Lh6;7;+8j% zUYUg}_oy;k%{oWT`bne8T-;H^>30GNTt@ZF`6>315OpR|X;c=~ZZnOQu=95(d_Ff= zutoQr9`EF5NjpqqF`(AW9*Au)PxO5WJd4Bda>`Z&ldHp+(HLMWmvF^gF46UHP}PcH zDS8=J#2$L`D-wzp#=xjnaUN2KGGA2Zm4Wrj0`Zu~L@yNQXpluXnv}&z)dtB|qHvll z-ddRGQFt|c2GR2cli@B;g?D;sz)GC)qbSbP1(8cxf~=x0OK~(Q%aA&hXr#h0Eypo9 z`E|AR^|?P0krAPY=k;)Zlocp8Stx0;P|akY654fID>N@cAE~qJHF{lSLg zM7S+ftE?2+g{CD7MN1YslPpvw*^T&BI2GXuv;Oq^iJ9LLh|el>G@NZge6kzatJQqc zdmo?*SnDgV534^>5me-B{+obQ4%k_DAd*2oW7* z!qKFx6(YTejlqT5Mc$b8sc^4P!asJysVf*6cV_4I2$wc%ZP+9}8pSu}VyE#0Vs(p? zdxVoKE!+w?dB9f{%GUz6Z^GBux4A}V4M}jqZvd^0+&qAf$7~9h7En1o!R*5QgtAUl z4PCxot=a~p4nQYzt0(2pgdSjH;J$NwyBa8pn+Tmu*_>5B$-TB(UwDQ0>Nqqn?{@~!iPNAydE+2B>^h+>Kb_;&*!vk-o zez_4a&N^}T+u;>3(Ce!k$910-*n7n}s*#JMN!f?g*OL2j;j75!$yrjIq7tZl zo?3|mNFB;SqzIDy1pbn82)A;Te54-j``;llMf!+LN&(7tYoe%2izeE`?OITnG06D!S8+O?0u#9F*Gsrc=3|b3(&-f<8OV?^aeDs%a4ZHdsKTh@zkL1 zFAXRq5F+q%6i1U%DnxoU@fa?Ankd|vOun>i#{lg9)=1$Cxh^Q18lYdUUeo6-8%m{O zJ*ZtAKi)nMJEvekwd49EK9niegrq%U_q#e6>p@r==n&Fzz$~ozfB|(AdyRAoql%D; zb))NuaypKMl3R+T92YeMs7|OgJBiexoI=`K4Nr?)sQ(!?*IA?uaz9`Glu|9Kfy&mX)wqPzp|)c#dW zE05CEx%0JwZ{<eeb4{2+CyDxH~>JQXh z50QHGtqG?-+>aOfwr!4;AIdC9S~+_oFyK}R=9hCHOvSD2Y*pJH327+vu`2BeQit*s zX=`nJCUT+7W;NGyq`u~I;lfwy=gHtwz91@rs(w+e1R-@Oj1;;iR?xhR7v)R%E?4<7 zQjeB>UbL=AAFWII3QBWpmMGm{v*1yNg=ux>aJ~2;1ap`*(rVC0TCHKWgiuSQ@(&`r zTeH3@YWym$mq)X{=FZm!#+OI4-cTQaX1%xGdZ?qSSs#yS_wGxY^>w#k3}YZxb98nFl!wcEP|biYf)s`gl@t7QK-5ss!lvokD_is(0A0jeHW=i z`6r~U73_N=7vA9eYOWt3^(fd6aq5G*+pk1zo49bg;EVCSag;Z=KSIo00g=OT; zizz=6;!x;6tK$9zsYCfO($*UG6Ojvr{#4ENuSk8(_A^}giv7H~WXhYOBB<`q)r!1@ z)S>(vQfS#ryfWn%xRI;8jnt!R!d|{=7N^o~#8}vtnVc_ktJ$27V5a;M<+_zl)Wc8d z!psqRtJPrFnyol$B5VnEv&LWvwcD(AdzfAMcahz#biWdHe--P@qjdk_&esO+nMdh< ztv&#yd$(%z_qPe9%O3Z;TXSAgx_8`y-}`Xi!e6%g4U{f7;3LhBc2K&1KKu5rJ70s+ zb^C|BsoTzZl`h`-veNwqLIiq#i=#<-SBUf~-Fvw3DV<8BFYcG(#{`s+qt`Fs- zf|%@Yxgh22`^eNi>w=>xTDA0l-q|B1A<&izj0!VCOf&GiSQ9-aFm zPJQ542Q{eNZ9${E!76X9C%Z<)3*uV)nBDtPvtuapPeK~X{79AdXQU40FGyRf+<%E& zDD$ssuD>DmHQL9x@Rj;GliC!qPb6yF4o8#H9;rj=fYi^2R5d~6pD7)2Cs*l&w2eMw z%d>6PxtEx3k)d9uo6;F2yOmDVq($ixEMRhKt#*Sh!K@FpYOtN4jTCfGEUG7j8|+FK zk=?CyT}AC*MSt@sT{m~WHjv;vN*Aa;ASQ03pOl9QW}+A;AFe8?fc=#&+*M~rmW-R(p69uOk%(-TLN(o2Z+DqU|}_>@j; z<8dM9@90BUxJ=O4BtE_p3wL)!WG7TeC1A7UAo{Fs!FI zL6RGl5=7@$x^<6^>{>fL1{=B!QW`xmV~6=RxFwL>8PN*Q5m)*Q>nvhXqmQT~u+>+s zV?U%0r9aZv+BZPt!kY|Ka}7f3(Z0br_36PRyayJTQPd^8NR8mo@gq~l1niUFgICFw z8xfgNlPg$s$`BzD@oiagW&#I?FQ{qCNwe|lFP2&U*_=f8x> zNoqcG4J43KyaAj=AWYFrijPH!- zaVMrIcUnc;`{)M#QO!-#rkM!zNTO<$`Id*so;%^2s0PG3u&qu3lFty^QZ@#Rp(WX! z_)6Q!b=Z_ga3gKr4+u{(qB_hyI4CsUKkn@|kZQMG#v@dSM zoDrbbAuRe>_ce534|pbY4}^u&{2T&BGvIOOGX~Rx>wIlbnh}D} za!u@BjQuA?{qf2zgd8th>m`0(=b#>3F`pX+sqiqB$ke=h>oRs)=~?jd9_#UK)@*vb zjSi)@iJC`;L2(HH9FK|`%w*R-Fyi`xn^(zAV=>R*vmubZWs?oQPrm4r&t&KEy{-^- zByDmpcBXRSa(+0JiiaAf>k&zIaOTDk;}*KSnI4-^CW3|9Le1N@n$8&J&?(R+w$Kg9 zIK}{89)h^8ZVg%zVjO6|2GR|>2Yxb|u+7XoKcReg9v4pN8~_~VY^SGlz>g5-|1K8+ zccqw%-4ZYEZy3OE`4m>39~@^US<%tw`MluLo16CtO5Cf%Eb zDR&__Ic~#(`nuIxQPzFb4CP%>(HQV)t6Z=@q=ZAbL1w zJ74T7c69jg3T)V)gvWAL#92yQw*$21JSrMyTu*mW@H+#kZDZy^%2hfc&z}Lt7dH>g z7_@FLbOMAbI8fsY3- z0vkO=aL9$vLHI$iXe>)NQ~4lN{i?hYX7`B7$b(1cnu<*4jjB0pVP$?x2DSD*pOeS9m6E}S1JtZ8M?kF*XnlnPp*^0%dMB+O&>Ud0-fWH&@ z#GL*WpVuXpyl#Fg$~NP#twhS#mH)c=Uza%ay7^!CegB^S72w3J*X`fGx99)g?fH4@ zb@RV2K0j~y%1n%wVz8d2j;FJcI+QtL1bvAFI9KHKjMtJl$9QeT(WJ~n>Rx$l zU!cs#g>U8YXorA+F5I|rCOun(RfKphXs6ibfwA=>O1WXgxL%lw+<{Z-g2M;48a1aU zV!q*u<^n;JS~-fudSD}$M{(zZu9|gHZ#eQU z%O*^NOAMGRl_C>?!#6lF>X)`yC-_khP?n6)>w+; zwx{5hlUz&MkL#SkCl*RpOa-bikGLEQj}6o~EuRiZOUG=_mCjXjZR;?JhM&B*M^qFY z#H1RV)GvHS52uFPZlN;+v#?A1b!js8fFtNdp61_(qKDKG(o<3 z6XvG}V=-B9b>;gj@k=83v1>3KbNek2KQVj}9`{^2zjplU{bJgKwyfrg>Fdo&I0tDk zVG|cG<_fG1al;rs3ufRJ{KSaCx+-x5UBqw9kaP8v9-T$KxjAKfM>-afBBjzp%(*e^ ztecMtH1X5irGvAvH<^tu?`8oY7vTWA;PKx}+O#Ws2f45ky9^tfO;prZ8ohHZS22E` z7{4NQjvDbZ8Q8E?*S5(u0y8P+AZ*KAl(3jtLB$CP@UROdOsGzqlR*W%DcYNhGKOQd z=o(_O(Je;(#L0U#UaF~jEYtbs=J)04akdGfNf=KL!@FBEmRTP~1E23|P&81r# z==3nUiyt88lAfGDVxn5-f_OULZw}@aqLWXV&5O5U1s%T~l-rd`K_XotmerPLPoaX4 z2+#w!Wq?E|Z>1BM_=L^G4-uW~As*ZT4&uoHBW0>a^AvGOUrb?Z&|C;RqS9eKN7L0% zvFbr*=;T_uK#zxPBG-bs5t(##9W_eI0-?Fk_l2tFE<);179(w;mbH)rqeNbh#xD`) z(D&#^LYh( z$%7s-zWd&Syx)5nY2v4q#YgQKeUYyiN7WOl`Q? z*7|wNdojPV5j7QEvI$3%vKgsE*@9GXCPe@4v^l`rbeNSme3`3kMH(xBmt|8j@Xx*| e@!|^+g_EV7u9=2^n3rJ{&c@X=bNGF5Bq7P-YG7|3DQ2gVU+BCT(cs8CVC^3aMxOLrmkR(9t8k#BzI?mcI}Y*20h zcE42x_N1kn;}rfJT^E*0@yO;%Xow$R3jZB1BJes%C0v@%n3<6Zl;oQ7;i~klg-KC#(OLbvc^AMnEU}8# z(tV=_F`hCx(k%5R>kplPMY1=RS_SG{AtfEkLg@XO^{by_gwkW-^TrmWluAIm!Z`g0iRXKp8T*HU25D0hF3 zlhT|KIksuZM2!sAmtx9{l$lV^;vK|!qzaIao-z&X_clr_Ur(HP6;$Rtpp2tA^PssH z=W<*#x@uTOHu)ukWBmkL$G)#^P)<&BU5Eq)VClV5;3EfI|B92Y<5LM4ux41NB_}~+k3GfdH`8F*53enZN6Bl;^c7<- zz$Ldn*XSP7!Hj1vyqoLy@2gqfMdv{!P{jWb$wuh~LEglha%=`qV`I zdp!iseaQS{wK{Zch?cxL+G#MI68(GBB#?ye=Z3ho%)cm_!^m;^cJe612VV5~$2>Kn z1rN0(HTa65qmcP$ru~|b(pg80ITZ%|cSGoJ4SjASSF@bBT>KK|-mRxOHUl%V$%`hN z8vLLrx|=e#M>G0%hKxjJc|fMpfo9nEwV~y{8%fXmGoW*yJ7o&`bfN3p0`ck3PJ;D+ zzHr=K*HC+~ts~ zN{1^}rNT5%(Z8N+)!5U#2aF$x{<%8VVeQS`F!LPqkGU&BF*#51=k1%p>VN))hnDF_ z{Ba(%6GZ<8F`mu!UB+fFM|SOA#HKO`%9^??eJD`uA}MjWB!L;bSZ^9SD;28GUq_BE z9|fkB%s(AvYBc-h5i&u&#_+^L^iNqJkegqGR6#QHkDZ$aDN6-n9gUqpRN9zFG<&X!8iQ@ft@J=XBD$D`99??Hj hQ3Rg7FdJImV>x!$ar_lU3=WPy3{5R8$Non=|1Wq-Fuwo* literal 0 HcmV?d00001 diff --git a/tests/data/nuscenes/sweeps/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470898274.pcd.bin b/tests/data/nuscenes/sweeps/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470898274.pcd.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c278a6c8e68cd52b07d952b8c98684a64f2ec3e GIT binary patch literal 2000 zcmZ9Mdr(tH6o;>3rJ{&c@X=bNGF5Bq7P-YG7|3DQ2gVU+BCT(cs8CVC^3aMxOLrmkR(9t8k#BzI?mcI}Y*20h zcE42x_N1kn;}rfJT^E*0@yO;%Xow$R3jZB1BJes%C0v@%n3<6Zl;oQ7;i~klg-KC#(OLbvc^AMnEU}8# z(tV=_F`hCx(k%5R>kplPMY1=RS_SG{AtfEkLg@XO^{by_gwkW-^TrmWluAIm!Z`g0iRXKp8T*HU25D0hF3 zlhT|KIksuZM2!sAmtx9{l$lV^;vK|!qzaIao-z&X_clr_Ur(HP6;$Rtpp2tA^PssH z=W<*#x@uTOHu)ukWBmkL$G)#^P)<&BU5Eq)VClV5;3EfI|B92Y<5LM4ux41NB_}~+k3GfdH`8F*53enZN6Bl;^c7<- zz$Ldn*XSP7!Hj1vyqoLy@2gqfMdv{!P{jWb$wuh~LEglha%=`qV`I zdp!iseaQS{wK{Zch?cxL+G#MI68(GBB#?ye=Z3ho%)cm_!^m;^cJe612VV5~$2>Kn z1rN0(HTa65qmcP$ru~|b(pg80ITZ%|cSGoJ4SjASSF@bBT>KK|-mRxOHUl%V$%`hN z8vLLrx|=e#M>G0%hKxjJc|fMpfo9nEwV~y{8%fXmGoW*yJ7o&`bfN3p0`ck3PJ;D+ zzHr=K*HC+~ts~ zN{1^}rNT5%(Z8N+)!5U#2aF$x{<%8VVeQS`F!LPqkGU&BF*#51=k1%p>VN))hnDF_ z{Ba(%6GZ<8F`mu!UB+fFM|SOA#HKO`%9^??eJD`uA}MjWB!L;bSZ^9SD;28GUq_BE z9|fkB%s(AvYBc-h5i&u&#_+^L^iNqJkegqGR6#QHkDZ$aDN6-n9gUqpRN9zFG<&X!8iQ@ft@J=XBD$D`99??Hj hQ3Rg7FdJImV>x!$ar_lU3=WPy3{5R8$Non=|1Wq-Fuwo* literal 0 HcmV?d00001 diff --git a/tests/test_dataset/test_nuscene_dataset.py b/tests/test_dataset/test_nuscene_dataset.py new file mode 100644 index 0000000000..17b28725e1 --- /dev/null +++ b/tests/test_dataset/test_nuscene_dataset.py @@ -0,0 +1,60 @@ +import numpy as np + +from mmdet3d.datasets import NuScenesDataset + + +def test_getitem(): + np.random.seed(0) + point_cloud_range = [-50, -50, -5, 50, 50, 3] + file_client_args = dict(backend='disk') + class_names = [ + 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', + 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' + ] + pipeline = [ + dict( + type='LoadPointsFromFile', + load_dim=5, + use_dim=5, + file_client_args=file_client_args), + dict( + type='LoadPointsFromMultiSweeps', + sweeps_num=2, + file_client_args=file_client_args), + dict( + type='MultiScaleFlipAug3D', + img_scale=(1333, 800), + pts_scale_ratio=1, + flip=False, + transforms=[ + dict( + type='GlobalRotScaleTrans', + rot_range=[0, 0], + scale_ratio_range=[1., 1.], + translation_std=[0, 0, 0]), + dict(type='RandomFlip3D'), + dict( + type='PointsRangeFilter', + point_cloud_range=point_cloud_range), + dict( + type='DefaultFormatBundle3D', + class_names=class_names, + with_label=False), + dict(type='Collect3D', keys=['points']) + ]) + ] + + nus_dataset = NuScenesDataset( + 'tests/data/nuscenes/nus_info.pkl', + pipeline, + 'tests/data/nuscenes', + test_mode=True) + data = nus_dataset[0] + assert data['img_metas'][0].data['flip'] is False + assert data['img_metas'][0].data['pcd_horizontal_flip'] is False + assert data['points'][0]._data.shape == (100, 4) + + data = nus_dataset[1] + assert data['img_metas'][0].data['flip'] is False + assert data['img_metas'][0].data['pcd_horizontal_flip'] is False + assert data['points'][0]._data.shape == (597, 4) diff --git a/tests/test_pipeline/test_load_points_from_multi_sweeps.py b/tests/test_pipeline/test_load_points_from_multi_sweeps.py new file mode 100644 index 0000000000..de17fd0479 --- /dev/null +++ b/tests/test_pipeline/test_load_points_from_multi_sweeps.py @@ -0,0 +1,67 @@ +import numpy as np + +from mmdet3d.datasets.pipelines.loading import LoadPointsFromMultiSweeps + + +def test_load_points_from_multi_sweeps(): + np.random.seed(0) + + file_client_args = dict(backend='disk') + load_points_from_multi_sweeps_1 = LoadPointsFromMultiSweeps( + sweeps_num=9, + use_dim=[0, 1, 2, 3, 4], + file_client_args=file_client_args) + + load_points_from_multi_sweeps_2 = LoadPointsFromMultiSweeps( + sweeps_num=9, + use_dim=[0, 1, 2, 3, 4], + file_client_args=file_client_args, + pad_empty_sweeps=True, + remove_close=True) + + load_points_from_multi_sweeps_3 = LoadPointsFromMultiSweeps( + sweeps_num=9, + use_dim=[0, 1, 2, 3, 4], + file_client_args=file_client_args, + pad_empty_sweeps=True, + remove_close=True, + test_mode=True) + + points = np.random.random([100, 5]) * 2 + + input_results = dict(points=points, sweeps=[], timestamp=None) + results = load_points_from_multi_sweeps_1(input_results) + assert results['points'].shape == (100, 5) + + input_results = dict(points=points, sweeps=[], timestamp=None) + results = load_points_from_multi_sweeps_2(input_results) + assert results['points'].shape == (775, 5) + + sensor2lidar_rotation = np.array( + [[9.99999967e-01, 1.13183067e-05, 2.56845368e-04], + [-1.12839618e-05, 9.99999991e-01, -1.33719456e-04], + [-2.56846879e-04, 1.33716553e-04, 9.99999958e-01]]) + sensor2lidar_translation = np.array([-0.0009198, -0.03964854, -0.00190136]) + sweep = dict( + data_path='tests/data/nuscenes/sweeps/LIDAR_TOP/' + 'n008-2018-09-18-12-07-26-0400__LIDAR_TOP__' + '1537287083900561.pcd.bin', + sensor2lidar_rotation=sensor2lidar_rotation, + sensor2lidar_translation=sensor2lidar_translation, + timestamp=0) + + input_results = dict(points=points, sweeps=[sweep], timestamp=1.0) + results = load_points_from_multi_sweeps_1(input_results) + assert results['points'].shape == (500, 5) + + input_results = dict(points=points, sweeps=[sweep], timestamp=1.0) + results = load_points_from_multi_sweeps_2(input_results) + assert results['points'].shape == (451, 5) + + input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0) + results = load_points_from_multi_sweeps_2(input_results) + assert results['points'].shape == (3259, 5) + + input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0) + results = load_points_from_multi_sweeps_3(input_results) + assert results['points'].shape == (3259, 5) From 3ba50ca51df37796d388afe5aae45772010f181e Mon Sep 17 00:00:00 2001 From: twang <30491025+Tai-Wang@users.noreply.github.com> Date: Wed, 19 Aug 2020 22:33:28 +0800 Subject: [PATCH 08/14] Fix minor bugs of computing iou3d (#69) * Fix minor bugs encountered when computing iou3d with gpu while the specified cuda id is not default 0 * Fix nonstandard formats * Remove trailing whitespace * Fix nonstandard format * Fix ambiguous initialization of cuda tensors --- mmdet3d/ops/iou3d/iou3d_utils.py | 61 ++++++++++++++++++-------------- mmdet3d/ops/iou3d/src/iou3d.cpp | 7 ++-- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/mmdet3d/ops/iou3d/iou3d_utils.py b/mmdet3d/ops/iou3d/iou3d_utils.py index 30551a3d40..7070af46e5 100644 --- a/mmdet3d/ops/iou3d/iou3d_utils.py +++ b/mmdet3d/ops/iou3d/iou3d_utils.py @@ -4,15 +4,17 @@ def boxes_iou_bev(boxes_a, boxes_b): - """ - :param boxes_a: (M, 5) - :param boxes_b: (N, 5) - :return: - ans_iou: (M, N) - """ + """Calculate boxes IoU in the bird view. - ans_iou = torch.cuda.FloatTensor( - torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() + Args: + boxes_a (torch.Tensor): Input boxes a with shape (M, 5). + boxes_b (torch.Tensor): Input boxes b with shape (N, 5). + + Returns: + ans_iou (torch.Tensor): IoU result with shape (M, N). + """ + ans_iou = boxes_a.new_zeros( + torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) @@ -21,34 +23,41 @@ def boxes_iou_bev(boxes_a, boxes_b): def nms_gpu(boxes, scores, thresh): + """Non maximum suppression on GPU. + + Args: + boxes (torch.Tensor): Input boxes with shape (N, 5). + scores (torch.Tensor): Scores of predicted boxes with shape (N). + thresh (torch.Tensor): Threshold of non maximum suppression. + + Returns: + torch.Tensor: Remaining indices with scores in descending order. """ - :param boxes: (N, 5) [x1, y1, x2, y2, ry] - :param scores: (N) - :param thresh: - :return: - """ - # areas = (x2 - x1) * (y2 - y1) order = scores.sort(0, descending=True)[1] boxes = boxes[order].contiguous() - keep = torch.LongTensor(boxes.size(0)) - num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh) - return order[keep[:num_out].cuda()].contiguous() + keep = boxes.new_zeros(boxes.size(0)) + num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh, boxes.device.index) + return order[keep[:num_out].cuda(boxes.device)].contiguous() def nms_normal_gpu(boxes, scores, thresh): + """Normal non maximum suppression on GPU. + + Args: + boxes (torch.Tensor): Input boxes with shape (N, 5). + scores (torch.Tensor): Scores of predicted boxes with shape (N). + thresh (torch.Tensor): Threshold of non maximum suppression. + + Returns: + torch.Tensor: Remaining indices with scores in descending order. """ - :param boxes: (N, 5) [x1, y1, x2, y2, ry] - :param scores: (N) - :param thresh: - :return: - """ - # areas = (x2 - x1) * (y2 - y1) order = scores.sort(0, descending=True)[1] boxes = boxes[order].contiguous() - keep = torch.LongTensor(boxes.size(0)) - num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh) - return order[keep[:num_out].cuda()].contiguous() + keep = boxes.new_zeros(boxes.size(0)) + num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh, + boxes.device.index) + return order[keep[:num_out].cuda(boxes.device)].contiguous() diff --git a/mmdet3d/ops/iou3d/src/iou3d.cpp b/mmdet3d/ops/iou3d/src/iou3d.cpp index f5158d5f23..706cddbb80 100644 --- a/mmdet3d/ops/iou3d/src/iou3d.cpp +++ b/mmdet3d/ops/iou3d/src/iou3d.cpp @@ -92,12 +92,14 @@ int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, return 1; } -int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh) { +int nms_gpu(at::Tensor boxes, at::Tensor keep, + float nms_overlap_thresh, int device_id) { // params boxes: (N, 5) [x1, y1, x2, y2, ry] // params keep: (N) CHECK_INPUT(boxes); CHECK_CONTIGUOUS(keep); + cudaSetDevice(device_id); int boxes_num = boxes.size(0); const float *boxes_data = boxes.data_ptr(); @@ -145,12 +147,13 @@ int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh) { } int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, - float nms_overlap_thresh) { + float nms_overlap_thresh, int device_id) { // params boxes: (N, 5) [x1, y1, x2, y2, ry] // params keep: (N) CHECK_INPUT(boxes); CHECK_CONTIGUOUS(keep); + cudaSetDevice(device_id); int boxes_num = boxes.size(0); const float *boxes_data = boxes.data_ptr(); From f6a7d78c921db565d605146705e9713a4fb9dbf0 Mon Sep 17 00:00:00 2001 From: Wenwei Zhang <40779233+ZwwWayne@users.noreply.github.com> Date: Thu, 20 Aug 2020 11:33:13 +0800 Subject: [PATCH 09/14] Fix (torch.nonzero): Fix warning of torch.nonzero and bug of nms_iou (#70) --- .../core/bbox/samplers/iou_neg_piecewise_sampler.py | 5 +++-- mmdet3d/core/post_processing/box3d_nms.py | 3 ++- mmdet3d/models/dense_heads/free_anchor3d_head.py | 2 +- mmdet3d/models/dense_heads/vote_head.py | 12 ++++++++---- mmdet3d/ops/iou3d/iou3d_utils.py | 4 ++-- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py b/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py index b4a4fb5313..8a309ce2f2 100644 --- a/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py +++ b/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py @@ -55,7 +55,7 @@ def _sample_pos(self, assign_result, num_expected, **kwargs): def _sample_neg(self, assign_result, num_expected, **kwargs): """Randomly sample some negative samples.""" - neg_inds = torch.nonzero(assign_result.gt_inds == 0) + neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) if neg_inds.numel() != 0: neg_inds = neg_inds.squeeze(1) if len(neg_inds) <= num_expected: @@ -80,7 +80,8 @@ def _sample_neg(self, assign_result, num_expected, **kwargs): max_iou_thr = self.neg_iou_thr[piece_inds] piece_neg_inds = torch.nonzero( (max_overlaps >= min_iou_thr) - & (max_overlaps < max_iou_thr)).view(-1) + & (max_overlaps < max_iou_thr), + as_tuple=False).view(-1) if len(piece_neg_inds) < piece_expected_num: neg_inds_choice = torch.cat( diff --git a/mmdet3d/core/post_processing/box3d_nms.py b/mmdet3d/core/post_processing/box3d_nms.py index 1915ec5fcb..c517b156cb 100644 --- a/mmdet3d/core/post_processing/box3d_nms.py +++ b/mmdet3d/core/post_processing/box3d_nms.py @@ -129,7 +129,8 @@ def aligned_3d_nms(boxes, scores, classes, thresh): inter = inter_l * inter_w * inter_h iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter) iou = iou * (classes1 == classes2).float() - score_sorted = score_sorted[torch.nonzero(iou <= thresh).flatten()] + score_sorted = score_sorted[torch.nonzero( + iou <= thresh, as_tuple=False).flatten()] indices = boxes.new_tensor(pick, dtype=torch.long) return indices diff --git a/mmdet3d/models/dense_heads/free_anchor3d_head.py b/mmdet3d/models/dense_heads/free_anchor3d_head.py index 54171f8aa9..91f8379d7f 100644 --- a/mmdet3d/models/dense_heads/free_anchor3d_head.py +++ b/mmdet3d/models/dense_heads/free_anchor3d_head.py @@ -140,7 +140,7 @@ def loss(self, box_cls_prob = torch.sparse.sum( object_cls_box_prob, dim=0).to_dense() - indices = torch.nonzero(box_cls_prob).t_() + indices = torch.nonzero(box_cls_prob, as_tuple=False).t_() if indices.numel() == 0: image_box_prob = torch.zeros( anchors_.size(0), diff --git a/mmdet3d/models/dense_heads/vote_head.py b/mmdet3d/models/dense_heads/vote_head.py index d5238ae9af..f9ecc063d1 100644 --- a/mmdet3d/models/dense_heads/vote_head.py +++ b/mmdet3d/models/dense_heads/vote_head.py @@ -409,7 +409,8 @@ def get_targets_single(self, box_indices_all = gt_bboxes_3d.points_in_boxes(points) for i in range(gt_labels_3d.shape[0]): box_indices = box_indices_all[:, i] - indices = torch.nonzero(box_indices).squeeze(-1) + indices = torch.nonzero( + box_indices, as_tuple=False).squeeze(-1) selected_points = points[indices] vote_target_masks[indices] = 1 vote_targets_tmp = vote_targets[indices] @@ -418,7 +419,8 @@ def get_targets_single(self, for j in range(self.gt_per_seed): column_indices = torch.nonzero( - vote_target_idx[indices] == j).squeeze(-1) + vote_target_idx[indices] == j, + as_tuple=False).squeeze(-1) vote_targets_tmp[column_indices, int(j * 3):int(j * 3 + 3)] = votes[column_indices] @@ -435,7 +437,8 @@ def get_targets_single(self, dtype=torch.long) for i in torch.unique(pts_instance_mask): - indices = torch.nonzero(pts_instance_mask == i).squeeze(-1) + indices = torch.nonzero( + pts_instance_mask == i, as_tuple=False).squeeze(-1) if pts_semantic_mask[indices[0]] < self.num_classes: selected_points = points[indices, :3] center = 0.5 * ( @@ -558,7 +561,8 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, # filter empty boxes and boxes with low score scores_mask = (obj_scores > self.test_cfg.score_thr) - nonempty_box_inds = torch.nonzero(nonempty_box_mask).flatten() + nonempty_box_inds = torch.nonzero( + nonempty_box_mask, as_tuple=False).flatten() nonempty_mask = torch.zeros_like(bbox_classes).scatter( 0, nonempty_box_inds[nms_selected], 1) selected = (nonempty_mask.bool() & scores_mask.bool()) diff --git a/mmdet3d/ops/iou3d/iou3d_utils.py b/mmdet3d/ops/iou3d/iou3d_utils.py index 7070af46e5..6c256ac53d 100644 --- a/mmdet3d/ops/iou3d/iou3d_utils.py +++ b/mmdet3d/ops/iou3d/iou3d_utils.py @@ -37,7 +37,7 @@ def nms_gpu(boxes, scores, thresh): boxes = boxes[order].contiguous() - keep = boxes.new_zeros(boxes.size(0)) + keep = torch.zeros(boxes.size(0), dtype=torch.long) num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh, boxes.device.index) return order[keep[:num_out].cuda(boxes.device)].contiguous() @@ -57,7 +57,7 @@ def nms_normal_gpu(boxes, scores, thresh): boxes = boxes[order].contiguous() - keep = boxes.new_zeros(boxes.size(0)) + keep = torch.zeros(boxes.size(0), dtype=torch.long) num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh, boxes.device.index) return order[keep[:num_out].cuda(boxes.device)].contiguous() From ba5fa548bbe0d239933f543854547fbfa4668511 Mon Sep 17 00:00:00 2001 From: twang <30491025+Tai-Wang@users.noreply.github.com> Date: Wed, 26 Aug 2020 21:17:51 +0800 Subject: [PATCH 10/14] Fix a minor bug in json2csv of lyft dataset (#78) * Fix a minor bug in json2csv of lyft dataset * Fix variable names and minor hard code --- mmdet3d/datasets/lyft_dataset.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mmdet3d/datasets/lyft_dataset.py b/mmdet3d/datasets/lyft_dataset.py index 837fa79f4c..c69e027506 100644 --- a/mmdet3d/datasets/lyft_dataset.py +++ b/mmdet3d/datasets/lyft_dataset.py @@ -425,18 +425,16 @@ def show(self, results, out_dir): pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2 show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name) - @staticmethod - def json2csv(json_path, csv_savepath): + def json2csv(self, json_path, csv_savepath): """Convert the json file to csv format for submission. Args: json_path (str): Path of the result json file. csv_savepath (str): Path to save the csv file. """ - with open(json_path, 'r') as f: - results = mmcv.load(f)['results'] - csv_nopred = 'data/lyft/sample_submission.csv' - data = pd.read_csv(csv_nopred) + results = mmcv.load(json_path)['results'] + sample_list_path = osp.join(self.data_root, 'sample_submission.csv') + data = pd.read_csv(sample_list_path) Id_list = list(data['Id']) pred_list = list(data['PredictionString']) cnt = 0 From 7b1f6a850f9b7507d7cb6afc1d576e3bb5b265d0 Mon Sep 17 00:00:00 2001 From: encore-zhou Date: Wed, 26 Aug 2020 21:31:52 +0800 Subject: [PATCH 11/14] add primitive head (#53) * add primitive head * register of primitive head * modify primitive head * modify primitive head * modify primitive head * modify primitive head * update primitive head unittest * modify primitive had * fix bugs for primitive head * update primitive head --- mmdet3d/models/model_utils/vote_module.py | 2 +- .../models/roi_heads/mask_heads/__init__.py | 3 +- .../roi_heads/mask_heads/primitive_head.py | 855 ++++++++++++++++++ tests/test_heads.py | 112 +++ 4 files changed, 970 insertions(+), 2 deletions(-) create mode 100644 mmdet3d/models/roi_heads/mask_heads/primitive_head.py diff --git a/mmdet3d/models/model_utils/vote_module.py b/mmdet3d/models/model_utils/vote_module.py index 790fa5f6b6..bfea4c8448 100644 --- a/mmdet3d/models/model_utils/vote_module.py +++ b/mmdet3d/models/model_utils/vote_module.py @@ -126,7 +126,7 @@ def get_loss(self, seed_points, vote_points, seed_indices, seed_indices_expand = seed_indices.unsqueeze(-1).repeat( 1, 1, 3 * self.gt_per_seed) seed_gt_votes = torch.gather(vote_targets, 1, seed_indices_expand) - seed_gt_votes += seed_points.repeat(1, 1, 3) + seed_gt_votes += seed_points.repeat(1, 1, self.gt_per_seed) weight = seed_gt_votes_mask / (torch.sum(seed_gt_votes_mask) + 1e-6) distance = self.vote_loss( diff --git a/mmdet3d/models/roi_heads/mask_heads/__init__.py b/mmdet3d/models/roi_heads/mask_heads/__init__.py index 532bbfaae9..ecc8a118a5 100644 --- a/mmdet3d/models/roi_heads/mask_heads/__init__.py +++ b/mmdet3d/models/roi_heads/mask_heads/__init__.py @@ -1,3 +1,4 @@ from .pointwise_semantic_head import PointwiseSemanticHead +from .primitive_head import PrimitiveHead -__all__ = ['PointwiseSemanticHead'] +__all__ = ['PointwiseSemanticHead', 'PrimitiveHead'] diff --git a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py new file mode 100644 index 0000000000..d15a845bb5 --- /dev/null +++ b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py @@ -0,0 +1,855 @@ +import torch +from mmcv.cnn import ConvModule +from torch import nn as nn +from torch.nn import functional as F + +from mmdet3d.models.builder import build_loss +from mmdet3d.models.model_utils import VoteModule +from mmdet3d.ops import PointSAModule, furthest_point_sample +from mmdet.core import multi_apply +from mmdet.models import HEADS + + +@HEADS.register_module() +class PrimitiveHead(nn.Module): + r"""Primitive head of `H3DNet `_. + + Args: + num_dims (int): The dimension of primitive semantic information. + num_classes (int): The number of class. + primitive_mode (str): The mode of primitive module, + avaliable mode ['z', 'xy', 'line']. + bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and + decoding boxes. + train_cfg (dict): Config for training. + test_cfg (dict): Config for testing. + vote_moudule_cfg (dict): Config of VoteModule for point-wise votes. + vote_aggregation_cfg (dict): Config of vote aggregation layer. + feat_channels (tuple[int]): Convolution channels of + prediction layer. + upper_thresh (float): Threshold for line matching. + surface_thresh (float): Threshold for suface matching. + conv_cfg (dict): Config of convolution in prediction layer. + norm_cfg (dict): Config of BN in prediction layer. + objectness_loss (dict): Config of objectness loss. + center_loss (dict): Config of center loss. + semantic_loss (dict): Config of point-wise semantic segmentation loss. + """ + + def __init__(self, + num_dims, + num_classes, + primitive_mode, + train_cfg=None, + test_cfg=None, + vote_moudule_cfg=None, + vote_aggregation_cfg=None, + feat_channels=(128, 128), + upper_thresh=100.0, + surface_thresh=0.5, + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=None, + center_loss=None, + semantic_reg_loss=None, + semantic_cls_loss=None): + super(PrimitiveHead, self).__init__() + assert primitive_mode in ['z', 'xy', 'line'] + # The dimension of primitive semantic information. + self.num_dims = num_dims + self.num_classes = num_classes + self.primitive_mode = primitive_mode + self.train_cfg = train_cfg + self.test_cfg = test_cfg + self.gt_per_seed = vote_moudule_cfg['gt_per_seed'] + self.num_proposal = vote_aggregation_cfg['num_point'] + self.upper_thresh = upper_thresh + self.surface_thresh = surface_thresh + + self.objectness_loss = build_loss(objectness_loss) + self.center_loss = build_loss(center_loss) + self.semantic_reg_loss = build_loss(semantic_reg_loss) + self.semantic_cls_loss = build_loss(semantic_cls_loss) + + assert vote_aggregation_cfg['mlp_channels'][0] == vote_moudule_cfg[ + 'in_channels'] + + # Primitive existence flag prediction + self.flag_conv = ConvModule( + vote_moudule_cfg['conv_channels'][-1], + vote_moudule_cfg['conv_channels'][-1] // 2, + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=True) + self.flag_pred = torch.nn.Conv1d( + vote_moudule_cfg['conv_channels'][-1] // 2, 2, 1) + + self.vote_module = VoteModule(**vote_moudule_cfg) + self.vote_aggregation = PointSAModule(**vote_aggregation_cfg) + + prev_channel = vote_aggregation_cfg['mlp_channels'][-1] + conv_pred_list = list() + for k in range(len(feat_channels)): + conv_pred_list.append( + ConvModule( + prev_channel, + feat_channels[k], + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=True)) + prev_channel = feat_channels[k] + self.conv_pred = nn.Sequential(*conv_pred_list) + + conv_out_channel = 3 + num_dims + num_classes + self.conv_pred.add_module('conv_out', + nn.Conv1d(prev_channel, conv_out_channel, 1)) + + def init_weights(self): + """Initialize weights of VoteHead.""" + pass + + def forward(self, feats_dict, sample_mod): + """Forward pass. + + Args: + feats_dict (dict): Feature dict from backbone. + sample_mod (str): Sample mode for vote aggregation layer. + valid modes are "vote", "seed" and "random". + + Returns: + dict: Predictions of primitive head. + """ + assert sample_mod in ['vote', 'seed', 'random'] + + seed_points = feats_dict['fp_xyz_net0'][-1] + seed_features = feats_dict['hd_feature'] + results = {} + + primitive_flag = self.flag_conv(seed_features) + primitive_flag = self.flag_pred(primitive_flag) + + results['pred_flag_' + self.primitive_mode] = primitive_flag + + # 1. generate vote_points from seed_points + vote_points, vote_features = self.vote_module(seed_points, + seed_features) + results['vote_' + self.primitive_mode] = vote_points + results['vote_features_' + self.primitive_mode] = vote_features + + # 2. aggregate vote_points + if sample_mod == 'vote': + # use fps in vote_aggregation + sample_indices = None + elif sample_mod == 'seed': + # FPS on seed and choose the votes corresponding to the seeds + sample_indices = furthest_point_sample(seed_points, + self.num_proposal) + elif sample_mod == 'random': + # Random sampling from the votes + batch_size, num_seed = seed_points.shape[:2] + sample_indices = torch.randint( + 0, + num_seed, (batch_size, self.num_proposal), + dtype=torch.int32, + device=seed_points.device) + else: + raise NotImplementedError('Unsupported sample mod!') + + vote_aggregation_ret = self.vote_aggregation(vote_points, + vote_features, + sample_indices) + aggregated_points, features, aggregated_indices = vote_aggregation_ret + results['aggregated_points_' + self.primitive_mode] = aggregated_points + results['aggregated_features_' + self.primitive_mode] = features + results['aggregated_indices_' + + self.primitive_mode] = aggregated_indices + + # 3. predict primitive offsets and semantic information + predictions = self.conv_pred(features) + + # 4. decode predictions + decode_ret = self.primitive_decode_scores(predictions, + aggregated_points) + results.update(decode_ret) + + center, pred_ind = self.get_primitive_center( + primitive_flag, decode_ret['center_' + self.primitive_mode]) + + results['pred_' + self.primitive_mode + '_ind'] = pred_ind + results['pred_' + self.primitive_mode + '_center'] = center + return results + + def loss(self, + bbox_preds, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None, + img_metas=None, + gt_bboxes_ignore=None): + """Compute loss. + + Args: + bbox_preds (dict): Predictions from forward of primitive head. + points (list[torch.Tensor]): Input points. + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + bboxes of each sample. + gt_labels_3d (list[torch.Tensor]): Labels of each sample. + pts_semantic_mask (None | list[torch.Tensor]): Point-wise + semantic mask. + pts_instance_mask (None | list[torch.Tensor]): Point-wise + instance mask. + img_metas (list[dict]): Contain pcd and img's meta info. + gt_bboxes_ignore (None | list[torch.Tensor]): Specify + which bounding. + + Returns: + dict: Losses of Primitive Head. + """ + targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d, + pts_semantic_mask, pts_instance_mask, + bbox_preds) + + (point_mask, point_offset, gt_primitive_center, gt_primitive_semantic, + gt_sem_cls_label, gt_primitive_mask) = targets + + losses = {} + # Compute the loss of primitive existence flag + pred_flag = bbox_preds['pred_flag_' + self.primitive_mode] + flag_loss = self.objectness_loss(pred_flag, gt_primitive_mask.long()) + losses['flag_loss_' + self.primitive_mode] = flag_loss + + # calculate vote loss + vote_loss = self.vote_module.get_loss( + bbox_preds['seed_points'], + bbox_preds['vote_' + self.primitive_mode], + bbox_preds['seed_indices'], point_mask, point_offset) + losses['vote_loss_' + self.primitive_mode] = vote_loss + + num_proposal = bbox_preds['aggregated_points_' + + self.primitive_mode].shape[1] + primitive_center = bbox_preds['center_' + self.primitive_mode] + if self.primitive_mode != 'line': + primitive_semantic = bbox_preds['size_residuals_' + + self.primitive_mode].contiguous() + else: + primitive_semantic = None + semancitc_scores = bbox_preds['sem_cls_scores_' + + self.primitive_mode].transpose(2, 1) + + gt_primitive_mask = gt_primitive_mask / \ + (gt_primitive_mask.sum() + 1e-6) + center_loss, size_loss, sem_cls_loss = self.compute_primitive_loss( + primitive_center, primitive_semantic, semancitc_scores, + num_proposal, gt_primitive_center, gt_primitive_semantic, + gt_sem_cls_label, gt_primitive_mask) + losses['center_loss_' + self.primitive_mode] = center_loss + losses['size_loss_' + self.primitive_mode] = size_loss + losses['sem_loss_' + self.primitive_mode] = sem_cls_loss + + return losses + + def get_targets(self, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None, + bbox_preds=None): + """Generate targets of primitive head. + + Args: + points (list[torch.Tensor]): Points of each batch. + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + bboxes of each batch. + gt_labels_3d (list[torch.Tensor]): Labels of each batch. + pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + label of each batch. + pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + label of each batch. + bbox_preds (dict): Predictions from forward of primitive head. + + Returns: + tuple[torch.Tensor]: Targets of primitive head. + """ + for index in range(len(gt_labels_3d)): + if len(gt_labels_3d[index]) == 0: + fake_box = gt_bboxes_3d[index].tensor.new_zeros( + 1, gt_bboxes_3d[index].tensor.shape[-1]) + gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box) + gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1) + + if pts_semantic_mask is None: + pts_semantic_mask = [None for i in range(len(gt_labels_3d))] + pts_instance_mask = [None for i in range(len(gt_labels_3d))] + + (point_mask, point_sem, + point_offset) = multi_apply(self.get_targets_single, points, + gt_bboxes_3d, gt_labels_3d, + pts_semantic_mask, pts_instance_mask) + + point_mask = torch.stack(point_mask) + point_sem = torch.stack(point_sem) + point_offset = torch.stack(point_offset) + + batch_size = point_mask.shape[0] + num_proposal = bbox_preds['aggregated_points_' + + self.primitive_mode].shape[1] + num_seed = bbox_preds['seed_points'].shape[1] + seed_inds = bbox_preds['seed_indices'].long() + seed_inds_expand = seed_inds.view(batch_size, num_seed, + 1).repeat(1, 1, 3) + seed_gt_votes = torch.gather(point_offset, 1, seed_inds_expand) + seed_gt_votes += bbox_preds['seed_points'] + gt_primitive_center = seed_gt_votes.view(batch_size * num_proposal, 1, + 3) + + seed_inds_expand_sem = seed_inds.view(batch_size, num_seed, 1).repeat( + 1, 1, 4 + self.num_dims) + seed_gt_sem = torch.gather(point_sem, 1, seed_inds_expand_sem) + gt_primitive_semantic = seed_gt_sem[:, :, 3:3 + self.num_dims].view( + batch_size * num_proposal, 1, self.num_dims).contiguous() + + gt_sem_cls_label = seed_gt_sem[:, :, -1].long() + + gt_votes_mask = torch.gather(point_mask, 1, seed_inds) + + return (point_mask, point_offset, gt_primitive_center, + gt_primitive_semantic, gt_sem_cls_label, gt_votes_mask) + + def get_targets_single(self, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None): + """Generate targets of primitive head for single batch. + + Args: + points (torch.Tensor): Points of each batch. + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + boxes of each batch. + gt_labels_3d (torch.Tensor): Labels of each batch. + pts_semantic_mask (None | torch.Tensor): Point-wise semantic + label of each batch. + pts_instance_mask (None | torch.Tensor): Point-wise instance + label of each batch. + + Returns: + tuple[torch.Tensor]: Targets of primitive head. + """ + gt_bboxes_3d = gt_bboxes_3d.to(points.device) + num_points = points.shape[0] + + point_mask = points.new_zeros(num_points) + # Offset to the primitive center + point_offset = points.new_zeros([num_points, 3]) + # Semantic information of primitive center + point_sem = points.new_zeros([num_points, 3 + self.num_dims + 1]) + + instance_flag = torch.nonzero( + pts_semantic_mask != self.num_classes).squeeze(1) + instance_labels = pts_instance_mask[instance_flag].unique() + + for i, i_instance in enumerate(instance_labels): + indices = instance_flag[pts_instance_mask[instance_flag] == + i_instance] + coords = points[indices, :3] + cur_cls_label = pts_semantic_mask[indices][0] + + # Bbox Corners + cur_corners = gt_bboxes_3d.corners[i] + xmin, ymin, zmin = cur_corners.min(0)[0] + xmax, ymax, zmax = cur_corners.max(0)[0] + + plane_lower_temp = points.new_tensor( + [0, 0, 1, -cur_corners[7, -1]]) + upper_points = cur_corners[[1, 2, 5, 6]] + refined_distance = (upper_points * plane_lower_temp[:3]).sum(dim=1) + + if self.check_horizon(upper_points) and \ + plane_lower_temp[0] + plane_lower_temp[1] < \ + self.train_cfg['lower_thresh']: + plane_lower = points.new_tensor( + [0, 0, 1, plane_lower_temp[-1]]) + plane_upper = points.new_tensor( + [0, 0, 1, -torch.mean(refined_distance)]) + else: + raise NotImplementedError('Only horizontal plane is support!') + + if self.check_dist(plane_upper, upper_points) is False: + raise NotImplementedError( + 'Mean distance to plane should be lower than thresh!') + + # Get the boundary points here + point2plane_dist, selected = self.match_point2plane( + plane_lower, coords) + + # Get lower four lines + if self.primitive_mode == 'line': + point2line_matching = self.match_point2line( + coords[selected], xmin, xmax, ymin, ymax) + + point_mask, point_offset, point_sem = \ + self._assign_primitive_line_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + point2line_matching, + cur_corners, + [1, 1, 0, 0]) + + # Set the surface labels here + if self.primitive_mode == 'z' and \ + selected.sum() > self.train_cfg['num_point'] and \ + point2plane_dist[selected].var() < \ + self.train_cfg['var_thresh']: + + point_mask, point_offset, point_sem = \ + self._assign_primitive_surface_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + cur_corners) + + # Get the boundary points here + point2plane_dist, selected = self.match_point2plane( + plane_upper, coords) + + # Get upper four lines + if self.primitive_mode == 'line': + point2line_matching = self.match_point2line( + coords[selected], xmin, xmax, ymin, ymax) + + point_mask, point_offset, point_sem = \ + self._assign_primitive_line_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + point2line_matching, + cur_corners, + [1, 1, 0, 0]) + + if self.primitive_mode == 'z' and \ + selected.sum() > self.train_cfg['num_point'] and \ + point2plane_dist[selected].var() < \ + self.train_cfg['var_thresh']: + + point_mask, point_offset, point_sem = \ + self._assign_primitive_surface_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + cur_corners) + + # Get left two lines + plane_left_temp = self._get_plane_fomulation( + cur_corners[2] - cur_corners[3], + cur_corners[3] - cur_corners[0], cur_corners[0]) + + right_points = cur_corners[[4, 5, 7, 6]] + plane_left_temp /= torch.norm(plane_left_temp[:3]) + refined_distance = (right_points * plane_left_temp[:3]).sum(dim=1) + + if plane_left_temp[2] < self.train_cfg['lower_thresh']: + plane_left = plane_left_temp + plane_right = points.new_tensor([ + plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], + -refined_distance.mean() + ]) + else: + raise NotImplementedError( + 'Normal vector of the plane should be horizontal!') + + # Get the boundary points here + point2plane_dist, selected = self.match_point2plane( + plane_left, coords) + + # Get upper four lines + if self.primitive_mode == 'line': + _, _, line_sel1, line_sel2 = self.match_point2line( + coords[selected], xmin, xmax, ymin, ymax) + point_mask, point_offset, point_sem = \ + self._assign_primitive_line_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + [line_sel1, line_sel2], + cur_corners, + [2, 2]) + + if self.primitive_mode == 'xy' and \ + selected.sum() > self.train_cfg['num_point'] and \ + point2plane_dist[selected].var() < \ + self.train_cfg['var_thresh']: + + point_mask, point_offset, point_sem = \ + self._assign_primitive_surface_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + cur_corners) + + # Get the boundary points here + point2plane_dist, selected = self.match_point2plane( + plane_right, coords) + + if self.primitive_mode == 'line': + _, _, line_sel1, line_sel2 = self.match_point2line( + coords[selected], xmin, xmax, ymin, ymax) + + point_mask, point_offset, point_sem = \ + self._assign_primitive_line_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + [line_sel1, line_sel2], + cur_corners, + [2, 2]) + + if self.primitive_mode == 'xy' and \ + selected.sum() > self.train_cfg['num_point'] and \ + point2plane_dist[selected].var() < \ + self.train_cfg['var_thresh']: + + point_mask, point_offset, point_sem = \ + self._assign_primitive_surface_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + cur_corners) + + plane_front_temp = self._get_plane_fomulation( + cur_corners[0] - cur_corners[4], + cur_corners[4] - cur_corners[5], cur_corners[5]) + + back_points = cur_corners[[3, 2, 7, 6]] + plane_front_temp /= torch.norm(plane_front_temp[:3]) + refined_distance = (back_points * plane_front_temp[:3]).sum(dim=1) + + if plane_front_temp[2] < self.train_cfg['lower_thresh']: + plane_front = plane_front_temp + plane_back = points.new_tensor([ + plane_front_temp[0], plane_front_temp[1], + plane_front_temp[2], -torch.mean(refined_distance) + ]) + else: + raise NotImplementedError( + 'Normal vector of the plane should be horizontal!') + + # Get the boundary points here + point2plane_dist, selected = self.match_point2plane( + plane_front, coords) + + if self.primitive_mode == 'xy' and \ + selected.sum() > self.train_cfg['num_point'] and \ + (point2plane_dist[selected]).var() < \ + self.train_cfg['var_thresh']: + + point_mask, point_offset, point_sem = \ + self._assign_primitive_surface_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + cur_corners) + + # Get the boundary points here + point2plane_dist, selected = self.match_point2plane( + plane_back, coords) + + if self.primitive_mode == 'xy' and \ + selected.sum() > self.train_cfg['num_point'] and \ + point2plane_dist[selected].var() < \ + self.train_cfg['var_thresh']: + + point_mask, point_offset, point_sem = \ + self._assign_primitive_surface_targets(point_mask, + point_offset, + point_sem, + coords[selected], + indices[selected], + cur_cls_label, + cur_corners) + + return (point_mask, point_sem, point_offset) + + def primitive_decode_scores(self, predictions, aggregated_points): + """Decode predicted parts to primitive head. + + Args: + predictions (torch.Tensor): primitive pridictions of each batch. + aggregated_points (torch.Tensor): The aggregated points + of vote stage. + + Returns: + Dict: Predictions of primitive head, including center, + semantic size and semantic scores. + """ + + ret_dict = {} + pred_transposed = predictions.transpose(2, 1) + + center = aggregated_points + pred_transposed[:, :, 0:3] + ret_dict['center_' + self.primitive_mode] = center + + if self.primitive_mode in ['z', 'xy']: + ret_dict['size_residuals_' + self.primitive_mode] = \ + pred_transposed[:, :, 3:3 + self.num_dims] + + ret_dict['sem_cls_scores_' + self.primitive_mode] = \ + pred_transposed[:, :, 3 + self.num_dims:] + + return ret_dict + + def check_horizon(self, points): + """Check whether is a horizontal plane. + + Args: + points (torch.Tensor): Points of input. + + Returns: + Bool: Flag of result. + """ + return (points[0][-1] == points[1][-1]) and \ + (points[1][-1] == points[2][-1]) and \ + (points[2][-1] == points[3][-1]) + + def check_dist(self, plane_equ, points): + """Whether the mean of points to plane distance is lower than thresh. + + Args: + plane_equ (torch.Tensor): Plane to be checked. + points (torch.Tensor): Points to be checked. + + Returns: + Tuple: Flag of result. + """ + return (points[:, 2] + + plane_equ[-1]).sum() / 4.0 < self.train_cfg['lower_thresh'] + + def match_point2line(self, points, xmin, xmax, ymin, ymax): + """Match points to corresponding line. + + Args: + points (torch.Tensor): Points of input. + xmin (float): Min of X-axis. + xmax (float): Max of X-axis. + ymin (float): Min of Y-axis. + ymax (float): Max of Y-axis. + + Returns: + Tuple: Flag of matching correspondence. + """ + sel1 = torch.abs(points[:, 0] - xmin) < self.train_cfg['line_thresh'] + sel2 = torch.abs(points[:, 0] - xmax) < self.train_cfg['line_thresh'] + sel3 = torch.abs(points[:, 1] - ymin) < self.train_cfg['line_thresh'] + sel4 = torch.abs(points[:, 1] - ymax) < self.train_cfg['line_thresh'] + return sel1, sel2, sel3, sel4 + + def match_point2plane(self, plane, points): + """Match points to plane. + + Args: + plane (torch.Tensor): Equation of the plane. + points (torch.Tensor): Points of input. + + Returns: + Tuple: Distance of each point to the plane and + flag of matching correspondence. + """ + point2plane_dist = torch.abs((points * plane[:3]).sum(dim=1) + + plane[-1]) + min_dist = point2plane_dist.min() + selected = torch.abs(point2plane_dist - + min_dist) < self.train_cfg['dist_thresh'] + return point2plane_dist, selected + + def compute_primitive_loss(self, primitive_center, primitive_semantic, + semantic_scores, num_proposal, + gt_primitive_center, gt_primitive_semantic, + gt_sem_cls_label, gt_primitive_mask): + """Compute loss of primitive module. + + Args: + primitive_center (torch.Tensor): Pridictions of primitive center. + primitive_semantic (torch.Tensor): Pridictions of primitive + semantic. + semantic_scores (torch.Tensor): Pridictions of primitive + semantic scores. + num_proposal (int): The number of primitive proposal. + gt_primitive_center (torch.Tensor): Ground truth of + primitive center. + gt_votes_sem (torch.Tensor): Ground truth of primitive semantic. + gt_sem_cls_label (torch.Tensor): Ground truth of primitive + semantic class. + gt_primitive_mask (torch.Tensor): Ground truth of primitive mask. + + Returns: + Tuple: Loss of primitive module. + """ + batch_size = primitive_center.shape[0] + vote_xyz_reshape = primitive_center.view(batch_size * num_proposal, -1, + 3) + + center_loss = self.center_loss( + vote_xyz_reshape, + gt_primitive_center, + dst_weight=gt_primitive_mask.view(batch_size * num_proposal, 1))[1] + + if self.primitive_mode != 'line': + size_xyz_reshape = primitive_semantic.view( + batch_size * num_proposal, -1, self.num_dims).contiguous() + size_loss = self.semantic_reg_loss( + size_xyz_reshape, + gt_primitive_semantic, + dst_weight=gt_primitive_mask.view(batch_size * num_proposal, + 1))[1] + else: + size_loss = center_loss.new_tensor(0.0) + + # Semantic cls loss + sem_cls_loss = self.semantic_cls_loss( + semantic_scores, gt_sem_cls_label, weight=gt_primitive_mask) + + return center_loss, size_loss, sem_cls_loss + + def get_primitive_center(self, pred_flag, center): + """Generate primitive center from predictions. + + Args: + pred_flag (torch.Tensor): Scores of primitive center. + center (torch.Tensor): Pridictions of primitive center. + + Returns: + Tuple: Primitive center and the prediction indices. + """ + ind_normal = F.softmax(pred_flag, dim=1) + pred_indices = (ind_normal[:, 1, :] > + self.surface_thresh).detach().float() + selected = (ind_normal[:, 1, :] <= + self.surface_thresh).detach().float() + offset = torch.ones_like(center) * self.upper_thresh + center = center + offset * selected.unsqueeze(-1) + return center, pred_indices + + def _assign_primitive_line_targets(self, point_mask, point_offset, + point_sem, coords, indices, cls_label, + point2line_matching, corners, + center_axises): + """Generate targets of line primitive. + + Args: + point_mask (torch.Tensor): Tensor to store the ground + truth of mask. + point_offset (torch.Tensor): Tensor to store the ground + truth of offset. + point_sem (torch.Tensor): Tensor to store the ground + truth of semantic. + coords (torch.Tensor): The selected points. + indices (torch.Tensor): Indices of the selected points. + cls_label (int): Class label of the ground truth bounding box. + point2line_matching (torch.Tensor): Flag indicate that + matching line of each point. + corners (torch.Tensor): Corners of the ground truth bounding box. + center_axises (list[int]): Indicate in which axis the line center + should be refined. + + Returns: + Tuple: Targets of the line primitive. + """ + for line_select, center_axis in zip(point2line_matching, + center_axises): + if line_select.sum() > self.train_cfg['num_point_line']: + point_mask[indices[line_select]] = 1.0 + line_center = coords[line_select].mean(dim=0) + line_center[center_axis] = corners[:, center_axis].mean() + point_offset[indices[line_select]] = \ + line_center - coords[line_select] + point_sem[indices[line_select]] = \ + point_sem.new_tensor([line_center[0], line_center[1], + line_center[2], cls_label]) + return point_mask, point_offset, point_sem + + def _assign_primitive_surface_targets(self, point_mask, point_offset, + point_sem, coords, indices, + cls_label, corners): + """Generate targets for primitive z and primitive xy. + + Args: + point_mask (torch.Tensor): Tensor to store the ground + truth of mask. + point_offset (torch.Tensor): Tensor to store the ground + truth of offset. + point_sem (torch.Tensor): Tensor to store the ground + truth of semantic. + coords (torch.Tensor): The selected points. + indices (torch.Tensor): Indices of the selected points. + cls_label (int): Class label of the ground truth bounding box. + corners (torch.Tensor): Corners of the ground truth bounding box. + + Returns: + Tuple: Targets of the center primitive. + """ + point_mask[indices] = 1.0 + if self.primitive_mode == 'z': + center = point_mask.new_tensor([ + corners[:, 0].mean(), corners[:, 1].mean(), coords[:, + 2].mean() + ]) + point_sem[indices] = point_sem.new_tensor([ + center[0], center[1], center[2], + corners[:, 0].max() - corners[:, 0].min(), + corners[:, 1].max() - corners[:, 1].min(), cls_label + ]) + elif self.primitive_mode == 'xy': + center = point_mask.new_tensor([ + coords[:, 0].mean(), coords[:, 1].mean(), corners[:, 2].mean() + ]) + point_sem[indices] = point_sem.new_tensor([ + center[0], center[1], center[2], + corners[:, 2].max() - corners[:, 2].min(), cls_label + ]) + point_offset[indices] = center - coords + return point_mask, point_offset, point_sem + + def _get_plane_fomulation(self, vector1, vector2, point): + """Compute the equation of the plane. + + Args: + vector1 (torch.Tensor): Parallel vector of the plane. + vector2 (torch.Tensor): Parallel vector of the plane. + point (torch.Tensor): Point on the plane. + + Returns: + torch.Tensor: Equation of the plane. + """ + surface_norm = torch.cross(vector1, vector2) + surface_dis = -torch.dot(surface_norm, point) + plane = point.new_tensor( + [surface_norm[0], surface_norm[1], surface_norm[2], surface_dis]) + return plane diff --git a/tests/test_heads.py b/tests/test_heads.py index 865db50ea5..637ca5751f 100644 --- a/tests/test_heads.py +++ b/tests/test_heads.py @@ -457,3 +457,115 @@ def test_free_anchor_3D_head(): gt_labels, input_metas, None) assert losses['positive_bag_loss'] >= 0 assert losses['negative_bag_loss'] >= 0 + + +def test_primitive_head(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + _setup_seed(0) + + primitive_head_cfg = dict( + type='PrimitiveHead', + num_dims=2, + num_classes=18, + primitive_mode='z', + vote_moudule_cfg=dict( + in_channels=256, + vote_per_seed=1, + gt_per_seed=1, + conv_channels=(256, 256), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + norm_feats=True, + vote_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='none', + loss_dst_weight=10.0)), + vote_aggregation_cfg=dict( + num_point=64, + radius=0.3, + num_sample=16, + mlp_channels=[256, 128, 128, 128], + use_xyz=True, + normalize_xyz=True), + feat_channels=(128, 128), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.4, 0.6], + reduction='mean', + loss_weight=1.0), + center_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=1.0, + loss_dst_weight=1.0), + semantic_reg_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=1.0, + loss_dst_weight=1.0), + semantic_cls_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), + train_cfg=dict( + dist_thresh=0.2, + var_thresh=1e-2, + lower_thresh=1e-6, + num_point=100, + num_point_line=10, + line_thresh=0.2)) + + self = build_head(primitive_head_cfg).cuda() + fp_xyz = [torch.rand([2, 64, 3], dtype=torch.float32).cuda()] + hd_features = torch.rand([2, 256, 64], dtype=torch.float32).cuda() + fp_indices = [torch.randint(0, 64, [2, 64]).cuda()] + input_dict = dict( + fp_xyz_net0=fp_xyz, hd_feature=hd_features, fp_indices_net0=fp_indices) + + # test forward + ret_dict = self(input_dict, 'vote') + assert ret_dict['center_z'].shape == torch.Size([2, 64, 3]) + assert ret_dict['size_residuals_z'].shape == torch.Size([2, 64, 2]) + assert ret_dict['sem_cls_scores_z'].shape == torch.Size([2, 64, 18]) + assert ret_dict['aggregated_points_z'].shape == torch.Size([2, 64, 3]) + + # test loss + points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda() + ret_dict['seed_points'] = fp_xyz[0] + ret_dict['seed_indices'] = fp_indices[0] + + from mmdet3d.core.bbox import DepthInstance3DBoxes + gt_bboxes_3d = [ + DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()), + DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()) + ] + gt_labels_3d = torch.randint(0, 18, [2, 4]).cuda() + gt_labels_3d = [gt_labels_3d[0], gt_labels_3d[1]] + pts_semantic_mask = torch.randint(0, 19, [2, 1024]).cuda() + pts_semantic_mask = [pts_semantic_mask[0], pts_semantic_mask[1]] + pts_instance_mask = torch.randint(0, 4, [2, 1024]).cuda() + pts_instance_mask = [pts_instance_mask[0], pts_instance_mask[1]] + + loss_input_dict = dict( + bbox_preds=ret_dict, + points=points, + gt_bboxes_3d=gt_bboxes_3d, + gt_labels_3d=gt_labels_3d, + pts_semantic_mask=pts_semantic_mask, + pts_instance_mask=pts_instance_mask) + losses_dict = self.loss(**loss_input_dict) + + assert losses_dict['flag_loss_z'] >= 0 + assert losses_dict['vote_loss_z'] >= 0 + assert losses_dict['center_loss_z'] >= 0 + assert losses_dict['size_loss_z'] >= 0 + assert losses_dict['sem_loss_z'] >= 0 + + # 'Primitive_mode' should be one of ['z', 'xy', 'line'] + with pytest.raises(AssertionError): + primitive_head_cfg['vote_moudule_cfg']['in_channels'] = 'xyz' + build_head(primitive_head_cfg) From 010cd4ba8dc6b2ce749216ada9c0e25ab764bb3e Mon Sep 17 00:00:00 2001 From: yinchimaoliang Date: Sat, 29 Aug 2020 21:38:46 +0800 Subject: [PATCH 12/14] Change nus related (#75) * Add bottom2gravity and balance_class option. * Add valid_flag. * Add use_valid_flag. * Change balance_class option. * Add wrapper, remove gravity and balance_class option. * Change flag, change load_infos to repeat_indices. * Remove pdb, change docstring. * Change name, remove unnecessary args. * Remove _concat_dataset. * Change name of ClassSampledDataset. * Change docstring. * Add get_cat_ids in nus_dataset, add unittest for wrapper. * Change to len(self.dataset) * Update nus data. --- mmdet3d/datasets/__init__.py | 3 +- mmdet3d/datasets/builder.py | 39 +++++++++++ mmdet3d/datasets/dataset_wrappers.py | 73 +++++++++++++++++++ mmdet3d/datasets/nuscenes_dataset.py | 39 +++++++++-- tests/data/nuscenes/nus_info.pkl | Bin 26612 -> 29128 bytes tests/test_dataset/test_dataset_wrappers.py | 74 ++++++++++++++++++++ tools/data_converter/nuscenes_converter.py | 5 ++ 7 files changed, 226 insertions(+), 7 deletions(-) create mode 100644 mmdet3d/datasets/builder.py create mode 100644 mmdet3d/datasets/dataset_wrappers.py create mode 100644 tests/test_dataset/test_dataset_wrappers.py diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index 851afd4f59..ab62126915 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -1,4 +1,5 @@ -from mmdet.datasets.builder import DATASETS, build_dataloader, build_dataset +from mmdet.datasets.builder import build_dataloader +from .builder import DATASETS, build_dataset from .custom_3d import Custom3DDataset from .kitti_dataset import KittiDataset from .lyft_dataset import LyftDataset diff --git a/mmdet3d/datasets/builder.py b/mmdet3d/datasets/builder.py new file mode 100644 index 0000000000..c917b93c0e --- /dev/null +++ b/mmdet3d/datasets/builder.py @@ -0,0 +1,39 @@ +import platform +from mmcv.utils import build_from_cfg + +from mmdet.datasets import DATASETS +from mmdet.datasets.builder import _concat_dataset + +if platform.system() != 'Windows': + # https://github.com/pytorch/pytorch/issues/973 + import resource + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + hard_limit = rlimit[1] + soft_limit = min(4096, hard_limit) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) + + +def build_dataset(cfg, default_args=None): + from mmdet3d.datasets.dataset_wrappers import CBGSDataset + from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset, + ConcatDataset, RepeatDataset) + if isinstance(cfg, (list, tuple)): + dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) + elif cfg['type'] == 'ConcatDataset': + dataset = ConcatDataset( + [build_dataset(c, default_args) for c in cfg['datasets']], + cfg.get('separate_eval', True)) + elif cfg['type'] == 'RepeatDataset': + dataset = RepeatDataset( + build_dataset(cfg['dataset'], default_args), cfg['times']) + elif cfg['type'] == 'ClassBalancedDataset': + dataset = ClassBalancedDataset( + build_dataset(cfg['dataset'], default_args), cfg['oversample_thr']) + elif cfg['type'] == 'CBGSDataset': + dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args)) + elif isinstance(cfg.get('ann_file'), (list, tuple)): + dataset = _concat_dataset(cfg, default_args) + else: + dataset = build_from_cfg(cfg, DATASETS, default_args) + + return dataset diff --git a/mmdet3d/datasets/dataset_wrappers.py b/mmdet3d/datasets/dataset_wrappers.py new file mode 100644 index 0000000000..4de4c6a77c --- /dev/null +++ b/mmdet3d/datasets/dataset_wrappers.py @@ -0,0 +1,73 @@ +import numpy as np + +from .builder import DATASETS + + +@DATASETS.register_module() +class CBGSDataset(object): + """A wrapper of class sampled dataset with ann_file path. Implementation of + paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object + Detection `_. + + Balance the number of scenes under different classes. + + Args: + dataset (:obj:`CustomDataset`): The dataset to be class sampled. + """ + + def __init__(self, dataset): + self.dataset = dataset + self.CLASSES = dataset.CLASSES + self.sample_indices = self._get_sample_indices() + # self.dataset.data_infos = self.data_infos + if hasattr(self.dataset, 'flag'): + self.flag = np.array( + [self.dataset.flag[ind] for ind in self.sample_indices], + dtype=np.uint8) + + def _get_sample_indices(self): + """Load annotations from ann_file. + + Args: + ann_file (str): Path of the annotation file. + + Returns: + list[dict]: List of annotations after class sampling. + """ + class_sample_idxs = {name: [] for name in self.CLASSES} + for idx in range(len(self.dataset)): + class_sample_idx = self.dataset.get_cat_ids(idx) + for key in class_sample_idxs.keys(): + class_sample_idxs[key] += class_sample_idx[key] + duplicated_samples = sum([len(v) for _, v in class_sample_idx.items()]) + class_distribution = { + k: len(v) / duplicated_samples + for k, v in class_sample_idx.items() + } + + sample_indices = [] + + frac = 1.0 / len(self.CLASSES) + ratios = [frac / v for v in class_distribution.values()] + for cls_inds, ratio in zip(list(class_sample_idx.values()), ratios): + sample_indices += np.random.choice(cls_inds, + int(len(cls_inds) * + ratio)).tolist() + return sample_indices + + def __getitem__(self, idx): + """Get item from infos according to the given index. + + Returns: + dict: Data dictionary of the corresponding index. + """ + ori_idx = self.sample_indices[idx] + return self.dataset[ori_idx] + + def __len__(self): + """Return the length of data infos. + + Returns: + int: Length of data infos. + """ + return len(self.sample_indices) diff --git a/mmdet3d/datasets/nuscenes_dataset.py b/mmdet3d/datasets/nuscenes_dataset.py index e306675cfb..f8ad5730b2 100644 --- a/mmdet3d/datasets/nuscenes_dataset.py +++ b/mmdet3d/datasets/nuscenes_dataset.py @@ -36,8 +36,7 @@ class NuScenesDataset(Custom3DDataset): box_type_3d (str, optional): Type of 3D box of this dataset. Based on the `box_type_3d`, the dataset will encapsulate the box to its original format then converted them to `box_type_3d`. - Defaults to 'LiDAR' in this dataset. Available options includes - + Defaults to 'LiDAR' in this dataset. Available options includes. - 'LiDAR': Box in LiDAR coordinates. - 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Camera': Box in camera coordinates. @@ -47,6 +46,8 @@ class NuScenesDataset(Custom3DDataset): Defaults to False. eval_version (bool, optional): Configuration version of evaluation. Defaults to 'detection_cvpr_2019'. + use_valid_flag (bool): Whether to use `use_valid_flag` key in the info + file as mask to filter gt_boxes and gt_names. Defaults to False. """ NameMapping = { 'movable_object.barrier': 'barrier', @@ -111,8 +112,10 @@ def __init__(self, box_type_3d='LiDAR', filter_empty_gt=True, test_mode=False, - eval_version='detection_cvpr_2019'): + eval_version='detection_cvpr_2019', + use_valid_flag=False): self.load_interval = load_interval + self.use_valid_flag = use_valid_flag super().__init__( data_root=data_root, ann_file=ann_file, @@ -127,7 +130,6 @@ def __init__(self, self.eval_version = eval_version from nuscenes.eval.detection.config import config_factory self.eval_detection_configs = config_factory(self.eval_version) - if self.modality is None: self.modality = dict( use_camera=False, @@ -137,6 +139,29 @@ def __init__(self, use_external=False, ) + def get_cat_ids(self, idx): + """Get category distribution of single scene. + + Args: + idx (int): Index of the data_info. + + Returns: + dict[list]: for each category, if the current scene + contains such boxes, store a list containing idx, + otherwise, store empty list. + """ + class_sample_idx = {name: [] for name in self.CLASSES} + info = self.data_infos[idx] + if self.use_valid_flag: + mask = info['valid_flag'] + gt_names = set(info['gt_names'][mask]) + else: + gt_names = set(info['gt_names']) + for name in gt_names: + if name in self.CLASSES: + class_sample_idx[name].append(idx) + return class_sample_idx + def load_annotations(self, ann_file): """Load annotations from ann_file. @@ -173,7 +198,6 @@ def get_data_info(self, index): - ann_info (dict): Annotation info. """ info = self.data_infos[index] - # standard protocal modified from SECOND.Pytorch input_dict = dict( sample_idx=info['token'], @@ -228,7 +252,10 @@ def get_ann_info(self, index): """ info = self.data_infos[index] # filter out bbox containing no points - mask = info['num_lidar_pts'] > 0 + if self.use_valid_flag: + mask = info['valid_flag'] + else: + mask = info['num_lidar_pts'] > 0 gt_bboxes_3d = info['gt_boxes'][mask] gt_names_3d = info['gt_names'][mask] gt_labels_3d = [] diff --git a/tests/data/nuscenes/nus_info.pkl b/tests/data/nuscenes/nus_info.pkl index 31a2bb49420963582f89fc86cd14aafb1b16b92e..343b7ef5e34e2b47058879ae4d476e5f03c59052 100644 GIT binary patch delta 4586 zcmeHKX>?P^8MSP{1{o}9O>h$8C151uMV5`_4GS;wCTxQ+L12uKY}uCZ`f2Rc5EWXA zSwfn$leS4|8@i-h+LYALeM@Oe+R)OyNgA4@>As{}(h#7_{pP)=gYcj9oc`*m&v|#g zZ|1%`GgqI+ninsa9(&$&%9)$*uuoj_Bza7SCtmSRT(){rh6+17J3X5`^E_!{;!5!7 zP>$~&v+T`v$lb*`OGQ#r65*M|(}!n|e6H9fD|^xxhmj5A3E?5Rx!Acdh=_Sna9YC5 z9>X)rJ$SBH;+!=Mjvw(99@ZPe!(*&r%)#nL-}JR(JooAm^so}Ea?5Mxx36cemDzcA zxwAM2d$7yy;+!nj4d`$1TRca6PX;UA-kob_U*vz<8ngbl-_v&yUym`5(Hiqzm(O-t z%Tjmv7GbYbf#Gn!e_%K;NZF>Oq+ZGaT8|!i6@Q`0809is2|*@6{_%hqqC9z`WWsVQ zhJ5oeg&5lt8l_e8X1mR~nmZSuGkSX{G#;gV_?i#RDVb!y&&?(Pl4_Ii)sJv1~8RPAvhgm+W%9 zv?{e!#SVF@)SX(U;&OQv{p(a*A!{9#Ca0WqtZA-9&(O-DTRcgJsEXrNhp8H^mui4k zkI9qbNtvNq9e!oE&&<3TYEUDMKtWCN&a%9< zZXVI6YUUVIwJ26AP)&7++7L!u@`c@wAM(1i_7HWB@Q`L zo?+?2V1!MKuyv6l`Cz#%XA_6)QN+%`4n-Ucnm?Q63+1`GKVG?fT|tc#gU3?OURl^S zPV*zz`m_Zjy!dXkUfK$b`*a%;EPvm+z3y$y!t#BJwH+vE2QV19)`%WvL2IXC^#XMx zb|HjPb%k}KpE*$4tvCUopdc_f8B#^l7Z{0BA39>x57cU{vA)CK?Ct98rjQyL;E{+; zK3oyD3?kMH;xQPs#%W6*;!rE1u-xg)U8EIJ1l@9Fa(&%u9qf4`uDX=xa&X)cW4mDdpDtqd3^&{~id9co~^kecg`n4nRoY*z($6A<(q&gk z%jcK_rOzwQ7l4Ak2u!r}OU#1Omlf+P!1$^jMFOqy4gJ+$Wfru~D%RJ4g1!z!-KObv z+fd)=p>LosM(2R~$+}CHR%gqi`bNuR=+rD`5-dJYZ_D^5hnmITLMYFhj|K4^OFdn)YE8he~LL!`l;eP4HR?{m}v9Qm<6S06zf@Fd{xgOiMRmr z=;Hq6^m9b4_dHrJ{Q@ZHm$IvA#j;-^j?u4yy17Hj-20k3EWbggW-gOp?n_O!^xtx* znfp6g;5JU<-=kYTxg>rX|3Myf8|MB|KIAsc^&gmP?hAOAxA7;mUivdIzKy>ifhCsa zz3#s<3tRY`V!a3y^b#=9+`lsmS}!ZsC7^EZD+r-^W~#0S(XKeW0VSv9}BtC+}dSHLziYN zQ($YP)t0`HL(SGj@_|<4STM_Ttwz13%S-6jj)lcCr_Hc5vwW_lOCZOK&p_)X3oyR; zr96{M9%?)2UdAje{xypATA-jzV4|hVnFXyI6l(=gw=@eOlsekWT5e z0uwF0g;`L_R;(Ogd{wzfpmnajH#3h}(7IKzRsjXAmQ|kI1^GspgNTXg?}_K5{gn8)Aaw-|TG3{9n5u`luEkIv3q~RTp(YK`t%?pBc4-qMHg0 zkM)JcG;Ls}UU&5zqwX5edZ`hpRdn(UH6cMopWJ5N}qr7NDS3+1Y=CyNyS5 zv38Cj=25JTK)r}NctE>jg)6BOnO4JHinu9R-c`85w0ZVy{|3`GFLN=~7B$ul6toqn z{n8zs9Pv{?U0g+!Zey-K`8JL*Ip5^Gv>m8Vz5`)o)9j9cTGRd>CP93sBJ~0V?E(fP z**FP$$&dKe-KCF&a7E!C86zQq^@srN#=9{J0DYDer>_vhC P;ZfY-sSo|p$>M(lT$V@l delta 3796 zcmchadwi2u9mbm$nnDW=Xn}$qO$#OIjkNSi6M7@PrPr5R*eD^Pp-pH@`|D{Zj5y{- zkvYZf$aI_cE9$0bojP?motLR7UZ-s0ROW4>-E_(}ob&me_kFX*`teWy=yQJOoNu0U z&ePMkc^|uMdH#asY_+|htbF_hUy{#qV*EvI{3VAkWvIBjyW8jTE%2p@@oS-zL%IHE zQ)R!sT&`X3ivOI-cggIKOU`X;_qpUr{5*xZfc;f=?&r8RKl7jC|836n;?J=EoHb`= zi#J)bCu6kMl9c4|FHTBI8jcJEDKHikLzF9zR*c*1nDQ^gj5u^KJW6@;rcx-~!q2EvI*(F8=-1rw^b|5H%y4ME&z@WUPVwIe%+*a%0?gBPFJQgM=_jn1l zI2Cet8Jh=3#)iWI<)W`%BrR1=OR+4fT5Bx<)=#Cfud3F%QPnPaXO-7lrs{He3FDho zT_JO-Yb=#=XZ41bDvS)}3|;3-I!@JGuRB3CXf>*ZI(!yiiZ5lFHuIFjpNw4`sG&Oa z)Nh#F3iZh1vTVy1d8ztHRy~t>)6}458li%kLr&g3v zk9?@6(9y;WoZ1!B0TtwfhVTpOMC*tqq_|w(Ur{axYcp+Kn2d6WQI0NJB&Tbgx!qiH zM$u9hPAKYP(evq%7i#TBKE3j4ZDCy{CU3VsdTmkr7%hy>&8ZJFym<|+M%$qAoVKHb z?eE!q#JhtjY=5Vsc0mRCp`qy9Aa*kauj>_c1Jnp&4@x+#sdIGpGXtl+iU~jk?SqCU z!|KxP4~|Sw5Cbs^LG`=V+}IUp(R#YK(|$EGz%$Ve@>E^K7DlZH#B&HJ&F#!Mz@>hP z2BpVsU#wrEAq>l$C>nnDSur#-ISH!=gK*DK~QRL~J<$iD;^7cm7Q zqk3F{a>ti=y5L6?7{!H0jTTeLV1h z*52B-oo>TajE+M&_9gX`wrSLQ>^z6q@2YoZp5RiC{Y@xkdTLIZx}E)WQqI;F8h^jH z%iIPt_BYG61~YcKB7I)$cff~_^A@xky%ici&M9U9H{^FQ1*dl_>OIi-uHK6dye>B$ z$a)`B@OrP#Dc$xF4C%2lf>^E1&Ww+8smFRo zx|{9BBk^N$pxKP{}tXkw&aW(rOZ zE9x9HzN@dG1Ft^sK;llmCH=ulBpYaabzKL2- zmgkUcu*I4AEiUzBA44h6r^hesx8=DOGu`jV*%mY1@5+^}X1WjVo_}G_!-voDJ+vBq z9~wW$1$1zZqpdf2f4~&Z@k2%Z2rB5u(8P3q!W6uIs;I}IM!G*k2`6`(tK;X)!0Do5 zo`4E^5}KIqFPMVUB}M%b8sF7Z=tMmbd2-3X3i=f)_WLzjjeY|abXmIES1o@Ub&P%s z)i14oNX~lmWvsp3_B#yf;WC79A8&VN{GLla+&{?l4)Y=WM-0o0OXCmWXXHSK8SbCt z9UW%4fn)Q-y@Jp3G5(BJqrX7o$CyP2kv!LN#QQ8$IKp#^`YTk>-=K-%KF<`q{;sHh zK#g#(qJ+~~-^SJ#n1R!aig^ht=o&OJ+03g3iG$N%+FSUKX?( zB|`X{FDO)kCp@P;x6JvfIQ*g>r)OFDKuGXRhuQNUU zS-DKX%dV(AsGxkgy4Rk%j-gW7*t=%gdM;i3yD5Y8Q-K`mHN)8;Pxlt$8|5Rtt4kc< z%zvy-uCaU}T8)aJynKPrI!(psL_IUszNw6xOBloYr3%{!736}3)B|cz1V)1+;Rz~Z z%=njG&NZAjq1C7Ys+^BgB~Li~DYzax%F3w9rVCuO+$G|M^-n z_jTd_{A#j-He-_4dN|8pA&3z&-_^ 0 + for anno in annotations], + dtype=bool).reshape(-1) # convert velo from global to lidar for i in range(len(boxes)): velo = np.array([*velocity[i], 0.0]) @@ -253,6 +257,7 @@ def _fill_trainval_infos(nusc, [a['num_lidar_pts'] for a in annotations]) info['num_radar_pts'] = np.array( [a['num_radar_pts'] for a in annotations]) + info['valid_flag'] = valid_flag if sample['scene_token'] in train_scenes: train_nusc_infos.append(info) From 299d666a47cfeee4071959d5a2f2e9991723bee5 Mon Sep 17 00:00:00 2001 From: encore-zhou Date: Sun, 30 Aug 2020 20:03:45 +0800 Subject: [PATCH 13/14] Feature_3dssd_FPS_With_Dist_OP (#66) * add op fps with distance * add op fps with distance * modify F-DFS unittest * modify sa module * modify sa module * SA Module support D-FPS and F-FPS * modify sa module * update points sa module * modify point_sa_module * modify point sa module * reconstruct FPS * reconstruct FPS * modify docstring * modify docstring --- mmdet3d/ops/__init__.py | 12 +- mmdet3d/ops/furthest_point_sample/__init__.py | 9 +- .../furthest_point_sample.py | 36 ++++ .../furthest_point_sample/points_sampler.py | 149 ++++++++++++++ .../src/furthest_point_sample.cpp | 27 +++ .../src/furthest_point_sample_cuda.cu | 191 ++++++++++++++++++ mmdet3d/ops/furthest_point_sample/utils.py | 31 +++ .../ops/pointnet_modules/point_sa_module.py | 69 +++++-- tests/test_pointnet_modules.py | 100 ++++++++- tests/test_pointnet_ops.py | 32 ++- 10 files changed, 633 insertions(+), 23 deletions(-) create mode 100644 mmdet3d/ops/furthest_point_sample/points_sampler.py create mode 100644 mmdet3d/ops/furthest_point_sample/utils.py diff --git a/mmdet3d/ops/__init__.py b/mmdet3d/ops/__init__.py index 8e0f9f3581..4c386c8565 100644 --- a/mmdet3d/ops/__init__.py +++ b/mmdet3d/ops/__init__.py @@ -2,7 +2,8 @@ sigmoid_focal_loss) from .ball_query import ball_query -from .furthest_point_sample import furthest_point_sample +from .furthest_point_sample import (Points_Sampler, furthest_point_sample, + furthest_point_sample_with_dist) from .gather_points import gather_points from .group_points import (GroupAll, QueryAndGroup, group_points, grouping_operation) @@ -24,8 +25,9 @@ 'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu', 'make_sparse_convmodule', 'ball_query', 'furthest_point_sample', - 'three_interpolate', 'three_nn', 'gather_points', 'grouping_operation', - 'group_points', 'GroupAll', 'QueryAndGroup', 'PointSAModule', - 'PointSAModuleMSG', 'PointFPModule', 'points_in_boxes_batch', - 'get_compiler_version', 'get_compiling_cuda_version' + 'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn', + 'gather_points', 'grouping_operation', 'group_points', 'GroupAll', + 'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', + 'points_in_boxes_batch', 'get_compiler_version', + 'get_compiling_cuda_version', 'Points_Sampler' ] diff --git a/mmdet3d/ops/furthest_point_sample/__init__.py b/mmdet3d/ops/furthest_point_sample/__init__.py index 76e74ecc56..06d57d974f 100644 --- a/mmdet3d/ops/furthest_point_sample/__init__.py +++ b/mmdet3d/ops/furthest_point_sample/__init__.py @@ -1,3 +1,8 @@ -from .furthest_point_sample import furthest_point_sample +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) +from .points_sampler import Points_Sampler -__all__ = ['furthest_point_sample'] +__all__ = [ + 'furthest_point_sample', 'furthest_point_sample_with_dist', + 'Points_Sampler' +] diff --git a/mmdet3d/ops/furthest_point_sample/furthest_point_sample.py b/mmdet3d/ops/furthest_point_sample/furthest_point_sample.py index 3f9e45508e..d03b57c7ac 100644 --- a/mmdet3d/ops/furthest_point_sample/furthest_point_sample.py +++ b/mmdet3d/ops/furthest_point_sample/furthest_point_sample.py @@ -39,4 +39,40 @@ def backward(xyz, a=None): return None, None +class FurthestPointSamplingWithDist(Function): + """Furthest Point Sampling With Distance. + + Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance. + """ + + @staticmethod + def forward(ctx, points_dist: torch.Tensor, + num_points: int) -> torch.Tensor: + """forward. + + Args: + points_dist (Tensor): (B, N, N) Distance between each point pair. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_dist.is_contiguous() + + B, N, _ = points_dist.size() + output = points_dist.new_zeros([B, num_points], dtype=torch.int32) + temp = points_dist.new_zeros([B, N]).fill_(1e10) + + furthest_point_sample_ext.furthest_point_sampling_with_dist_wrapper( + B, N, num_points, points_dist, temp, output) + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + furthest_point_sample = FurthestPointSampling.apply +furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply diff --git a/mmdet3d/ops/furthest_point_sample/points_sampler.py b/mmdet3d/ops/furthest_point_sample/points_sampler.py new file mode 100644 index 0000000000..32645be999 --- /dev/null +++ b/mmdet3d/ops/furthest_point_sample/points_sampler.py @@ -0,0 +1,149 @@ +import torch +from torch import nn as nn +from typing import List + +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) +from .utils import calc_square_dist + + +def get_sampler_type(sampler_type): + """Get the type and mode of points sampler. + + Args: + sampler_type (str): The type of points sampler. + The valid value are "D-FPS", "F-FPS", or "FS". + + Returns: + class: Points sampler type. + """ + if sampler_type == 'D-FPS': + sampler = DFPS_Sampler + elif sampler_type == 'F-FPS': + sampler = FFPS_Sampler + elif sampler_type == 'FS': + sampler = FS_Sampler + else: + raise ValueError('Only "sampler_type" of "D-FPS", "F-FPS", or "FS"' + f' are supported, got {sampler_type}') + + return sampler + + +class Points_Sampler(nn.Module): + """Points sampling. + + Args: + num_point (list[int]): Number of sample points. + fps_mod_list (list[str]: Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + F-FPS: using feature distances for FPS. + D-FPS: using Euclidean distances of points for FPS. + FS: using F-FPS and D-FPS simultaneously. + fps_sample_range_list (list[int]): Range of points to apply FPS. + Default: [-1]. + """ + + def __init__(self, + num_point: List[int], + fps_mod_list: List[str] = ['D-FPS'], + fps_sample_range_list: List[int] = [-1]): + super(Points_Sampler, self).__init__() + # FPS would be applied to different fps_mod in the list, + # so the length of the num_point should be equal to + # fps_mod_list and fps_sample_range_list. + assert len(num_point) == len(fps_mod_list) == len( + fps_sample_range_list) + self.num_point = num_point + self.fps_sample_range_list = fps_sample_range_list + self.samplers = nn.ModuleList() + for fps_mod in fps_mod_list: + self.samplers.append(get_sampler_type(fps_mod)()) + + def forward(self, points_xyz, features): + """forward. + + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + features (Tensor): (B, C, N) Descriptors of the features. + + Return: + Tensor: (B, npoint, sample_num) Indices of sampled points. + """ + indices = [] + last_fps_end_index = 0 + + for fps_sample_range, sampler, npoint in zip( + self.fps_sample_range_list, self.samplers, self.num_point): + assert fps_sample_range < points_xyz.shape[1] + + if fps_sample_range == -1: + sample_points_xyz = points_xyz[:, last_fps_end_index:] + sample_features = features[:, :, last_fps_end_index:] + else: + sample_points_xyz = \ + points_xyz[:, last_fps_end_index:fps_sample_range] + sample_features = \ + features[:, :, last_fps_end_index:fps_sample_range] + + fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, + npoint) + + indices.append(fps_idx + last_fps_end_index) + last_fps_end_index += fps_sample_range + indices = torch.cat(indices, dim=1) + + return indices + + +class DFPS_Sampler(nn.Module): + """DFPS_Sampling. + + Using Euclidean distances of points for FPS. + """ + + def __init__(self): + super(DFPS_Sampler, self).__init__() + + def forward(self, points, features, npoint): + """Sampling points with D-FPS.""" + fps_idx = furthest_point_sample(points.contiguous(), npoint) + return fps_idx + + +class FFPS_Sampler(nn.Module): + """FFPS_Sampler. + + Using feature distances for FPS. + """ + + def __init__(self): + super(FFPS_Sampler, self).__init__() + + def forward(self, points, features, npoint): + """Sampling points with F-FPS.""" + features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) + features_dist = calc_square_dist( + features_for_fps, features_for_fps, norm=False) + fps_idx = furthest_point_sample_with_dist(features_dist, npoint) + return fps_idx + + +class FS_Sampler(nn.Module): + """FS_Sampling. + + Using F-FPS and D-FPS simultaneously. + """ + + def __init__(self): + super(FS_Sampler, self).__init__() + + def forward(self, points, features, npoint): + """Sampling points with FS_Sampling.""" + features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) + features_dist = calc_square_dist( + features_for_fps, features_for_fps, norm=False) + fps_idx_ffps = furthest_point_sample_with_dist(features_dist, npoint) + fps_idx_dfps = furthest_point_sample(points, npoint) + fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) + return fps_idx diff --git a/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp b/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp index 3fed7bffe8..be05890274 100644 --- a/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp +++ b/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp @@ -19,6 +19,16 @@ void furthest_point_sampling_kernel_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs, cudaStream_t stream); +int furthest_point_sampling_with_dist_wrapper(int b, int n, int m, + at::Tensor points_tensor, + at::Tensor temp_tensor, + at::Tensor idx_tensor); + +void furthest_point_sampling_with_dist_kernel_launcher(int b, int n, int m, + const float *dataset, + float *temp, int *idxs, + cudaStream_t stream); + int furthest_point_sampling_wrapper(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, @@ -32,7 +42,24 @@ int furthest_point_sampling_wrapper(int b, int n, int m, return 1; } +int furthest_point_sampling_with_dist_wrapper(int b, int n, int m, + at::Tensor points_tensor, + at::Tensor temp_tensor, + at::Tensor idx_tensor) { + + const float *points = points_tensor.data(); + float *temp = temp_tensor.data(); + int *idx = idx_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + furthest_point_sampling_with_dist_kernel_launcher(b, n, m, points, temp, idx, stream); + return 1; +} + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper"); + m.def("furthest_point_sampling_with_dist_wrapper", + &furthest_point_sampling_with_dist_wrapper, + "furthest_point_sampling_with_dist_wrapper"); } diff --git a/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu b/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu index bce19b96ed..6e09709f7c 100644 --- a/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu +++ b/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu @@ -207,3 +207,194 @@ void furthest_point_sampling_kernel_launcher(int b, int n, int m, exit(-1); } } + +// Modified from +// https://github.com/qiqihaer/3DSSD-pytorch/blob/master/lib/pointnet2/src/sampling_gpu.cu +template +__global__ void furthest_point_sampling_with_dist_kernel( + int b, int n, int m, const float *__restrict__ dataset, + float *__restrict__ temp, int *__restrict__ idxs) { + // dataset: (B, N, N) + // tmp: (B, N) + // output: + // idx: (B, M) + + if (m <= 0) + return; + __shared__ float dists[block_size]; + __shared__ int dists_i[block_size]; + + int batch_index = blockIdx.x; + dataset += batch_index * n * n; + temp += batch_index * n; + idxs += batch_index * m; + + int tid = threadIdx.x; + const int stride = block_size; + + int old = 0; + if (threadIdx.x == 0) + idxs[0] = old; + + __syncthreads(); + for (int j = 1; j < m; j++) { + int besti = 0; + float best = -1; + // float x1 = dataset[old * 3 + 0]; + // float y1 = dataset[old * 3 + 1]; + // float z1 = dataset[old * 3 + 2]; + for (int k = tid; k < n; k += stride) { + // float x2, y2, z2; + // x2 = dataset[k * 3 + 0]; + // y2 = dataset[k * 3 + 1]; + // z2 = dataset[k * 3 + 2]; + + // float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * + // (z2 - z1); + float d = dataset[old * n + k]; + + float d2 = min(d, temp[k]); + temp[k] = d2; + besti = d2 > best ? k : besti; + best = d2 > best ? d2 : best; + } + dists[tid] = best; + dists_i[tid] = besti; + __syncthreads(); + + if (block_size >= 1024) { + if (tid < 512) { + __update(dists, dists_i, tid, tid + 512); + } + __syncthreads(); + } + + if (block_size >= 512) { + if (tid < 256) { + __update(dists, dists_i, tid, tid + 256); + } + __syncthreads(); + } + if (block_size >= 256) { + if (tid < 128) { + __update(dists, dists_i, tid, tid + 128); + } + __syncthreads(); + } + if (block_size >= 128) { + if (tid < 64) { + __update(dists, dists_i, tid, tid + 64); + } + __syncthreads(); + } + if (block_size >= 64) { + if (tid < 32) { + __update(dists, dists_i, tid, tid + 32); + } + __syncthreads(); + } + if (block_size >= 32) { + if (tid < 16) { + __update(dists, dists_i, tid, tid + 16); + } + __syncthreads(); + } + if (block_size >= 16) { + if (tid < 8) { + __update(dists, dists_i, tid, tid + 8); + } + __syncthreads(); + } + if (block_size >= 8) { + if (tid < 4) { + __update(dists, dists_i, tid, tid + 4); + } + __syncthreads(); + } + if (block_size >= 4) { + if (tid < 2) { + __update(dists, dists_i, tid, tid + 2); + } + __syncthreads(); + } + if (block_size >= 2) { + if (tid < 1) { + __update(dists, dists_i, tid, tid + 1); + } + __syncthreads(); + } + + old = dists_i[0]; + if (tid == 0) + idxs[j] = old; + } +} + +void furthest_point_sampling_with_dist_kernel_launcher(int b, int n, int m, + const float *dataset, + float *temp, int *idxs, + cudaStream_t stream) { + // dataset: (B, N, N) + // temp: (B, N) + // output: + // idx: (B, M) + + cudaError_t err; + unsigned int n_threads = opt_n_threads(n); + + switch (n_threads) { + case 1024: + furthest_point_sampling_with_dist_kernel<1024><<>>( + b, n, m, dataset, temp, idxs); + break; + case 512: + furthest_point_sampling_with_dist_kernel<512><<>>( + b, n, m, dataset, temp, idxs); + break; + case 256: + furthest_point_sampling_with_dist_kernel<256><<>>( + b, n, m, dataset, temp, idxs); + break; + case 128: + furthest_point_sampling_with_dist_kernel<128><<>>( + b, n, m, dataset, temp, idxs); + break; + case 64: + furthest_point_sampling_with_dist_kernel<64><<>>( + b, n, m, dataset, temp, idxs); + break; + case 32: + furthest_point_sampling_with_dist_kernel<32><<>>( + b, n, m, dataset, temp, idxs); + break; + case 16: + furthest_point_sampling_with_dist_kernel<16><<>>( + b, n, m, dataset, temp, idxs); + break; + case 8: + furthest_point_sampling_with_dist_kernel<8><<>>( + b, n, m, dataset, temp, idxs); + break; + case 4: + furthest_point_sampling_with_dist_kernel<4><<>>( + b, n, m, dataset, temp, idxs); + break; + case 2: + furthest_point_sampling_with_dist_kernel<2><<>>( + b, n, m, dataset, temp, idxs); + break; + case 1: + furthest_point_sampling_with_dist_kernel<1><<>>( + b, n, m, dataset, temp, idxs); + break; + default: + furthest_point_sampling_with_dist_kernel<512><<>>( + b, n, m, dataset, temp, idxs); + } + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} diff --git a/mmdet3d/ops/furthest_point_sample/utils.py b/mmdet3d/ops/furthest_point_sample/utils.py new file mode 100644 index 0000000000..4ca235e13b --- /dev/null +++ b/mmdet3d/ops/furthest_point_sample/utils.py @@ -0,0 +1,31 @@ +import torch + + +def calc_square_dist(point_feat_a, point_feat_b, norm=True): + """Calculating square distance between a and b. + + Args: + point_feat_a (Tensor): (B, N, C) Feature vector of each point. + point_feat_b (Tensor): (B, M, C) Feature vector of each point. + norm (Bool): Whether to normalize the distance. + Default: True. + + Returns: + Tensor: (B, N, M) Distance between each pair points. + """ + length_a = point_feat_a.shape[1] + length_b = point_feat_b.shape[1] + num_channel = point_feat_a.shape[-1] + # [bs, n, 1] + a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) + # [bs, 1, m] + b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) + a_square = a_square.repeat((1, 1, length_b)) # [bs, n, m] + b_square = b_square.repeat((1, length_a, 1)) # [bs, n, m] + + coor = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) + + dist = a_square + b_square - 2 * coor + if norm: + dist = torch.sqrt(dist) / num_channel + return dist diff --git a/mmdet3d/ops/pointnet_modules/point_sa_module.py b/mmdet3d/ops/pointnet_modules/point_sa_module.py index d3f9731957..3783425e63 100644 --- a/mmdet3d/ops/pointnet_modules/point_sa_module.py +++ b/mmdet3d/ops/pointnet_modules/point_sa_module.py @@ -4,8 +4,7 @@ from torch.nn import functional as F from typing import List -from mmdet3d.ops import (GroupAll, QueryAndGroup, furthest_point_sample, - gather_points) +from mmdet3d.ops import GroupAll, Points_Sampler, QueryAndGroup, gather_points class PointSAModuleMSG(nn.Module): @@ -18,6 +17,13 @@ class PointSAModuleMSG(nn.Module): sample_nums (list[int]): Number of samples in each ball query. mlp_channels (list[int]): Specify of the pointnet before the global pooling for each scale. + fps_mod (list[str]: Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + F-FPS: using feature distances for FPS. + D-FPS: using Euclidean distances of points for FPS. + FS: using F-FPS and D-FPS simultaneously. + fps_sample_range_list (list[int]): Range of points to apply FPS. + Default: [-1]. norm_cfg (dict): Type of normalization method. Default: dict(type='BN2d'). use_xyz (bool): Whether to use xyz. @@ -33,19 +39,40 @@ def __init__(self, radii: List[float], sample_nums: List[int], mlp_channels: List[List[int]], + fps_mod: List[str] = ['D-FPS'], + fps_sample_range_list: List[int] = [-1], norm_cfg: dict = dict(type='BN2d'), use_xyz: bool = True, pool_mod='max', - normalize_xyz: bool = False): + normalize_xyz: bool = False, + bias='auto'): super().__init__() assert len(radii) == len(sample_nums) == len(mlp_channels) assert pool_mod in ['max', 'avg'] + assert isinstance(fps_mod, list) or isinstance(fps_mod, tuple) + assert isinstance(fps_sample_range_list, list) or isinstance( + fps_sample_range_list, tuple) + assert len(fps_mod) == len(fps_sample_range_list) + + if isinstance(mlp_channels, tuple): + mlp_channels = list(map(list, mlp_channels)) + + if isinstance(num_point, int): + self.num_point = [num_point] + elif isinstance(num_point, list) or isinstance(num_point, tuple): + self.num_point = num_point + else: + raise NotImplementedError('Error type of num_point!') - self.num_point = num_point self.pool_mod = pool_mod self.groupers = nn.ModuleList() self.mlps = nn.ModuleList() + self.fps_mod_list = fps_mod + self.fps_sample_range_list = fps_sample_range_list + + self.points_sampler = Points_Sampler(self.num_point, self.fps_mod_list, + self.fps_sample_range_list) for i in range(len(radii)): radius = radii[i] @@ -74,15 +101,17 @@ def __init__(self, kernel_size=(1, 1), stride=(1, 1), conv_cfg=dict(type='Conv2d'), - norm_cfg=norm_cfg)) + norm_cfg=norm_cfg, + bias=bias)) self.mlps.append(mlp) def forward( self, points_xyz: torch.Tensor, features: torch.Tensor = None, - indices: torch.Tensor = None - ) -> (torch.Tensor, torch.Tensor, torch.Tensor): + indices: torch.Tensor = None, + target_xyz: torch.Tensor = None, + ) -> (torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor): """forward. Args: @@ -91,6 +120,7 @@ def forward( Default: None. indices (Tensor): (B, num_point) Index of the features. Default: None. + target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs. Returns: Tensor: (B, M, 3) where M is the number of points. @@ -101,15 +131,18 @@ def forward( Index of the features. """ new_features_list = [] - xyz_flipped = points_xyz.transpose(1, 2).contiguous() - if indices is None: - indices = furthest_point_sample(points_xyz, self.num_point) - else: - assert (indices.shape[1] == self.num_point) - new_xyz = gather_points(xyz_flipped, indices).transpose( - 1, 2).contiguous() if self.num_point is not None else None + if indices is not None: + assert (indices.shape[1] == self.num_point[0]) + new_xyz = gather_points(xyz_flipped, indices).transpose( + 1, 2).contiguous() if self.num_point is not None else None + elif target_xyz is not None: + new_xyz = target_xyz.contiguous() + else: + indices = self.points_sampler(points_xyz, features) + new_xyz = gather_points(xyz_flipped, indices).transpose( + 1, 2).contiguous() if self.num_point is not None else None for i in range(len(self.groupers)): # (B, C, num_point, nsample) @@ -152,6 +185,10 @@ class PointSAModule(PointSAModuleMSG): Default: True. pool_mod (str): Type of pooling method. Default: 'max_pool'. + fps_mod (list[str]: Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + fps_sample_range_list (list[int]): Range of points to apply FPS. + Default: [-1]. normalize_xyz (bool): Whether to normalize local XYZ with radius. Default: False. """ @@ -164,6 +201,8 @@ def __init__(self, norm_cfg: dict = dict(type='BN2d'), use_xyz: bool = True, pool_mod: str = 'max', + fps_mod: List[str] = ['D-FPS'], + fps_sample_range_list: List[int] = [-1], normalize_xyz: bool = False): super().__init__( mlp_channels=[mlp_channels], @@ -173,4 +212,6 @@ def __init__(self, norm_cfg=norm_cfg, use_xyz=use_xyz, pool_mod=pool_mod, + fps_mod=fps_mod, + fps_sample_range_list=fps_sample_range_list, normalize_xyz=normalize_xyz) diff --git a/tests/test_pointnet_modules.py b/tests/test_pointnet_modules.py index e1462e58ef..a7d833eceb 100644 --- a/tests/test_pointnet_modules.py +++ b/tests/test_pointnet_modules.py @@ -25,7 +25,7 @@ def test_pointnet_sa_module_msg(): xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32) # (B, N, 3) - xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda() + xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda() # (B, C, N) features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda() @@ -35,6 +35,104 @@ def test_pointnet_sa_module_msg(): assert new_features.shape == torch.Size([1, 48, 16]) assert inds.shape == torch.Size([1, 16]) + # test D-FPS mod + self = PointSAModuleMSG( + num_point=16, + radii=[0.2, 0.4], + sample_nums=[4, 8], + mlp_channels=[[12, 16], [12, 32]], + norm_cfg=dict(type='BN2d'), + use_xyz=False, + pool_mod='max', + fps_mod=['D-FPS'], + fps_sample_range_list=[-1]).cuda() + + # test forward + new_xyz, new_features, inds = self(xyz, features) + assert new_xyz.shape == torch.Size([1, 16, 3]) + assert new_features.shape == torch.Size([1, 48, 16]) + assert inds.shape == torch.Size([1, 16]) + + # test F-FPS mod + self = PointSAModuleMSG( + num_point=16, + radii=[0.2, 0.4], + sample_nums=[4, 8], + mlp_channels=[[12, 16], [12, 32]], + norm_cfg=dict(type='BN2d'), + use_xyz=False, + pool_mod='max', + fps_mod=['F-FPS'], + fps_sample_range_list=[-1]).cuda() + + # test forward + new_xyz, new_features, inds = self(xyz, features) + assert new_xyz.shape == torch.Size([1, 16, 3]) + assert new_features.shape == torch.Size([1, 48, 16]) + assert inds.shape == torch.Size([1, 16]) + + # test FS mod + self = PointSAModuleMSG( + num_point=8, + radii=[0.2, 0.4], + sample_nums=[4, 8], + mlp_channels=[[12, 16], [12, 32]], + norm_cfg=dict(type='BN2d'), + use_xyz=False, + pool_mod='max', + fps_mod=['FS'], + fps_sample_range_list=[-1]).cuda() + + # test forward + new_xyz, new_features, inds = self(xyz, features) + assert new_xyz.shape == torch.Size([1, 16, 3]) + assert new_features.shape == torch.Size([1, 48, 16]) + assert inds.shape == torch.Size([1, 16]) + + # test using F-FPS mod and D-FPS mod simultaneously + self = PointSAModuleMSG( + num_point=[8, 12], + radii=[0.2, 0.4], + sample_nums=[4, 8], + mlp_channels=[[12, 16], [12, 32]], + norm_cfg=dict(type='BN2d'), + use_xyz=False, + pool_mod='max', + fps_mod=['F-FPS', 'D-FPS'], + fps_sample_range_list=[64, -1]).cuda() + + # test forward + new_xyz, new_features, inds = self(xyz, features) + assert new_xyz.shape == torch.Size([1, 20, 3]) + assert new_features.shape == torch.Size([1, 48, 20]) + assert inds.shape == torch.Size([1, 20]) + + # length of 'fps_mod' should be same as 'fps_sample_range_list' + with pytest.raises(AssertionError): + PointSAModuleMSG( + num_point=8, + radii=[0.2, 0.4], + sample_nums=[4, 8], + mlp_channels=[[12, 16], [12, 32]], + norm_cfg=dict(type='BN2d'), + use_xyz=False, + pool_mod='max', + fps_mod=['F-FPS', 'D-FPS'], + fps_sample_range_list=[-1]).cuda() + + # length of 'num_point' should be same as 'fps_sample_range_list' + with pytest.raises(AssertionError): + PointSAModuleMSG( + num_point=[8, 8], + radii=[0.2, 0.4], + sample_nums=[4, 8], + mlp_channels=[[12, 16], [12, 32]], + norm_cfg=dict(type='BN2d'), + use_xyz=False, + pool_mod='max', + fps_mod=['F-FPS'], + fps_sample_range_list=[-1]).cuda() + def test_pointnet_sa_module(): if not torch.cuda.is_available(): diff --git a/tests/test_pointnet_ops.py b/tests/test_pointnet_ops.py index c0a0b7e3e5..ecfda60757 100644 --- a/tests/test_pointnet_ops.py +++ b/tests/test_pointnet_ops.py @@ -1,7 +1,8 @@ import pytest import torch -from mmdet3d.ops import (ball_query, furthest_point_sample, gather_points, +from mmdet3d.ops import (ball_query, furthest_point_sample, + furthest_point_sample_with_dist, gather_points, grouping_operation, three_interpolate, three_nn) @@ -312,3 +313,32 @@ def test_three_nn(): assert torch.allclose(dist, expected_dist, 1e-4) assert torch.all(idx == expected_idx) + + +def test_fps_with_dist(): + if not torch.cuda.is_available(): + pytest.skip() + xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681], + [-0.8070, 2.4137, + -0.5845], [-1.0001, 2.1982, -0.5859], + [0.3841, 1.8983, -0.7431]], + [[-1.0696, 3.0758, + -0.1899], [-0.2559, 3.5521, -0.1402], + [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205], + [-0.0518, 3.7251, -0.3950]]]).cuda() + + expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda() + xyz_square_dist = ((xyz.unsqueeze(dim=1) - + xyz.unsqueeze(dim=2))**2).sum(-1) + idx = furthest_point_sample_with_dist(xyz_square_dist, 3) + assert torch.all(idx == expected_idx) + + import numpy as np + fps_idx = np.load('tests/data/ops/fps_idx.npy') + features_for_fps_distance = np.load( + 'tests/data/ops/features_for_fps_distance.npy') + expected_idx = torch.from_numpy(fps_idx).cuda() + features_for_fps_distance = torch.from_numpy( + features_for_fps_distance).cuda() + idx = furthest_point_sample_with_dist(features_for_fps_distance, 32) + assert torch.all(idx == expected_idx) From 16e8d143844eb1c349dd956978e683de03db6085 Mon Sep 17 00:00:00 2001 From: encore-zhou Date: Sun, 30 Aug 2020 22:57:23 +0800 Subject: [PATCH 14/14] add h3d head (#58) * add h3d head * add h3d roi head * update docstring of h3d roi head * reconstruct h3d head * remove unused code * modify h3d bbox head * add h3dnet init files * modify h3d bbox head * add depth_box3d unittest * update h3d head * add h3dnet benchmark * update docstring in vote_head * resovle primitive conflict --- configs/_base_/models/h3dnet.py | 332 +++++++ configs/h3dnet/README.md | 19 + configs/h3dnet/h3dnet_scannet-3d-18class.py | 70 ++ .../coders/partial_bin_based_bbox_coder.py | 14 +- mmdet3d/core/bbox/structures/depth_box3d.py | 49 + mmdet3d/models/dense_heads/vote_head.py | 48 +- mmdet3d/models/detectors/__init__.py | 3 +- mmdet3d/models/detectors/h3dnet.py | 173 ++++ mmdet3d/models/roi_heads/__init__.py | 6 +- .../models/roi_heads/bbox_heads/__init__.py | 4 +- .../roi_heads/bbox_heads/h3d_bbox_head.py | 931 ++++++++++++++++++ mmdet3d/models/roi_heads/h3d_roi_head.py | 158 +++ tests/test_box3d.py | 48 + tests/test_heads.py | 89 ++ 14 files changed, 1919 insertions(+), 25 deletions(-) create mode 100644 configs/_base_/models/h3dnet.py create mode 100644 configs/h3dnet/README.md create mode 100644 configs/h3dnet/h3dnet_scannet-3d-18class.py create mode 100644 mmdet3d/models/detectors/h3dnet.py create mode 100644 mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py create mode 100644 mmdet3d/models/roi_heads/h3d_roi_head.py diff --git a/configs/_base_/models/h3dnet.py b/configs/_base_/models/h3dnet.py new file mode 100644 index 0000000000..1b34aca396 --- /dev/null +++ b/configs/_base_/models/h3dnet.py @@ -0,0 +1,332 @@ +primitive_z_cfg = dict( + type='PrimitiveHead', + num_dims=2, + num_classes=18, + primitive_mode='z', + upper_thresh=100.0, + surface_thresh=0.5, + vote_moudule_cfg=dict( + in_channels=256, + vote_per_seed=1, + gt_per_seed=1, + conv_channels=(256, 256), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + norm_feats=True, + vote_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='none', + loss_dst_weight=10.0)), + vote_aggregation_cfg=dict( + num_point=1024, + radius=0.3, + num_sample=16, + mlp_channels=[256, 128, 128, 128], + use_xyz=True, + normalize_xyz=True), + feat_channels=(128, 128), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.4, 0.6], + reduction='mean', + loss_weight=30.0), + center_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=0.5, + loss_dst_weight=0.5), + semantic_reg_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=0.5, + loss_dst_weight=0.5), + semantic_cls_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), + train_cfg=dict( + dist_thresh=0.2, + var_thresh=1e-2, + lower_thresh=1e-6, + num_point=100, + num_point_line=10, + line_thresh=0.2)) + +primitive_xy_cfg = dict( + type='PrimitiveHead', + num_dims=1, + num_classes=18, + primitive_mode='xy', + upper_thresh=100.0, + surface_thresh=0.5, + vote_moudule_cfg=dict( + in_channels=256, + vote_per_seed=1, + gt_per_seed=1, + conv_channels=(256, 256), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + norm_feats=True, + vote_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='none', + loss_dst_weight=10.0)), + vote_aggregation_cfg=dict( + num_point=1024, + radius=0.3, + num_sample=16, + mlp_channels=[256, 128, 128, 128], + use_xyz=True, + normalize_xyz=True), + feat_channels=(128, 128), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.4, 0.6], + reduction='mean', + loss_weight=30.0), + center_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=0.5, + loss_dst_weight=0.5), + semantic_reg_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=0.5, + loss_dst_weight=0.5), + semantic_cls_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), + train_cfg=dict( + dist_thresh=0.2, + var_thresh=1e-2, + lower_thresh=1e-6, + num_point=100, + num_point_line=10, + line_thresh=0.2)) + +primitive_line_cfg = dict( + type='PrimitiveHead', + num_dims=0, + num_classes=18, + primitive_mode='line', + upper_thresh=100.0, + surface_thresh=0.5, + vote_moudule_cfg=dict( + in_channels=256, + vote_per_seed=1, + gt_per_seed=1, + conv_channels=(256, 256), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + norm_feats=True, + vote_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='none', + loss_dst_weight=10.0)), + vote_aggregation_cfg=dict( + num_point=1024, + radius=0.3, + num_sample=16, + mlp_channels=[256, 128, 128, 128], + use_xyz=True, + normalize_xyz=True), + feat_channels=(128, 128), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.4, 0.6], + reduction='mean', + loss_weight=30.0), + center_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=1.0, + loss_dst_weight=1.0), + semantic_reg_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='sum', + loss_src_weight=1.0, + loss_dst_weight=1.0), + semantic_cls_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=2.0), + train_cfg=dict( + dist_thresh=0.2, + var_thresh=1e-2, + lower_thresh=1e-6, + num_point=100, + num_point_line=10, + line_thresh=0.2)) + +proposal_module_cfg = dict( + suface_matching_cfg=dict( + num_point=256 * 6, + radius=0.5, + num_sample=32, + mlp_channels=[128 + 6, 128, 64, 32], + use_xyz=True, + normalize_xyz=True), + line_matching_cfg=dict( + num_point=256 * 12, + radius=0.5, + num_sample=32, + mlp_channels=[128 + 12, 128, 64, 32], + use_xyz=True, + normalize_xyz=True), + primitive_refine_channels=[128, 128, 128], + upper_thresh=100.0, + surface_thresh=0.5, + line_thresh=0.5, + train_cfg=dict( + far_threshold=0.6, + near_threshold=0.3, + mask_surface_threshold=0.3, + label_surface_threshold=0.3, + mask_line_threshold=0.3, + label_line_threshold=0.3), + cues_objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.3, 0.7], + reduction='mean', + loss_weight=5.0), + cues_semantic_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.3, 0.7], + reduction='mean', + loss_weight=5.0), + proposal_objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.2, 0.8], + reduction='none', + loss_weight=5.0), + primitive_center_loss=dict( + type='MSELoss', reduction='none', loss_weight=1.0)) + +model = dict( + type='H3DNet', + backbone=dict( + type='MultiBackbone', + num_streams=4, + suffixes=['net0', 'net1', 'net2', 'net3'], + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01), + act_cfg=dict(type='ReLU'), + backbones=dict( + type='PointNet2SASSG', + in_channels=4, + num_points=(2048, 1024, 512, 256), + radius=(0.2, 0.4, 0.8, 1.2), + num_samples=(64, 32, 16, 16), + sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), + (128, 128, 256)), + fp_channels=((256, 256), (256, 256)), + norm_cfg=dict(type='BN2d'), + pool_mod='max')), + rpn_head=dict( + type='VoteHead', + vote_moudule_cfg=dict( + in_channels=256, + vote_per_seed=1, + gt_per_seed=3, + conv_channels=(256, 256), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + norm_feats=True, + vote_loss=dict( + type='ChamferDistance', + mode='l1', + reduction='none', + loss_dst_weight=10.0)), + vote_aggregation_cfg=dict( + num_point=256, + radius=0.3, + num_sample=16, + mlp_channels=[256, 128, 128, 128], + use_xyz=True, + normalize_xyz=True), + feat_channels=(128, 128), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.2, 0.8], + reduction='sum', + loss_weight=5.0), + center_loss=dict( + type='ChamferDistance', + mode='l2', + reduction='sum', + loss_src_weight=10.0, + loss_dst_weight=10.0), + dir_class_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), + dir_res_loss=dict( + type='SmoothL1Loss', reduction='sum', loss_weight=10.0), + size_class_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), + size_res_loss=dict( + type='SmoothL1Loss', reduction='sum', loss_weight=10.0), + semantic_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), + roi_head=dict( + type='H3DRoIHead', + primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg], + bbox_head=dict( + type='H3DBboxHead', + gt_per_seed=3, + num_proposal=256, + proposal_module_cfg=proposal_module_cfg, + feat_channels=(128, 128), + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=dict( + type='CrossEntropyLoss', + class_weight=[0.2, 0.8], + reduction='sum', + loss_weight=5.0), + center_loss=dict( + type='ChamferDistance', + mode='l2', + reduction='sum', + loss_src_weight=10.0, + loss_dst_weight=10.0), + dir_class_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=0.1), + dir_res_loss=dict( + type='SmoothL1Loss', reduction='sum', loss_weight=10.0), + size_class_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=0.1), + size_res_loss=dict( + type='SmoothL1Loss', reduction='sum', loss_weight=10.0), + semantic_loss=dict( + type='CrossEntropyLoss', reduction='sum', loss_weight=0.1)))) + +# model training and testing settings +train_cfg = dict( + rpn=dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), + rpn_proposal=dict(use_nms=False), + rcnn=dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')) + +test_cfg = dict( + rpn=dict( + sample_mod='seed', + nms_thr=0.25, + score_thr=0.05, + per_class_proposal=True, + use_nms=False), + rcnn=dict( + sample_mod='seed', + nms_thr=0.25, + score_thr=0.05, + per_class_proposal=True)) diff --git a/configs/h3dnet/README.md b/configs/h3dnet/README.md new file mode 100644 index 0000000000..0c084e0619 --- /dev/null +++ b/configs/h3dnet/README.md @@ -0,0 +1,19 @@ +# H3DNet: 3D Object Detection Using Hybrid Geometric Primitives + +## Introduction +We implement H3DNet and provide the result and checkpoints on ScanNet datasets. +``` +@inproceedings{zhang2020h3dnet, + author = {Zhang, Zaiwei and Sun, Bo and Yang, Haitao and Huang, Qixing}, + title = {H3DNet: 3D Object Detection Using Hybrid Geometric Primitives}, + booktitle = {Proceedings of the European Conference on Computer Vision}, + year = {2020} +} +``` + +## Results + +### ScanNet +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download | +| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: | +| [MultiBackbone](./h3dnet_scannet-3d-18class.py) | 3x |7.9||66.43|48.01|[model](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20200620_230238-2cea9c3a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20200620_230238.log.json)| diff --git a/configs/h3dnet/h3dnet_scannet-3d-18class.py b/configs/h3dnet/h3dnet_scannet-3d-18class.py new file mode 100644 index 0000000000..c90abb7803 --- /dev/null +++ b/configs/h3dnet/h3dnet_scannet-3d-18class.py @@ -0,0 +1,70 @@ +_base_ = [ + '../_base_/datasets/scannet-3d-18class.py', '../_base_/models/h3dnet.py', + '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' +] + +# model settings +model = dict( + rpn_head=dict( + num_classes=18, + bbox_coder=dict( + type='PartialBinBasedBBoxCoder', + num_sizes=18, + num_dir_bins=24, + with_rot=False, + mean_sizes=[[0.76966727, 0.8116021, 0.92573744], + [1.876858, 1.8425595, 1.1931566], + [0.61328, 0.6148609, 0.7182701], + [1.3955007, 1.5121545, 0.83443564], + [0.97949594, 1.0675149, 0.6329687], + [0.531663, 0.5955577, 1.7500148], + [0.9624706, 0.72462326, 1.1481868], + [0.83221924, 1.0490936, 1.6875663], + [0.21132214, 0.4206159, 0.5372846], + [1.4440073, 1.8970833, 0.26985747], + [1.0294262, 1.4040797, 0.87554324], + [1.3766412, 0.65521795, 1.6813129], + [0.6650819, 0.71111923, 1.298853], + [0.41999173, 0.37906948, 1.7513971], + [0.59359556, 0.5912492, 0.73919016], + [0.50867593, 0.50656086, 0.30136237], + [1.1511526, 1.0546296, 0.49706793], + [0.47535285, 0.49249494, 0.5802117]])), + roi_head=dict( + bbox_head=dict( + num_classes=18, + bbox_coder=dict( + type='PartialBinBasedBBoxCoder', + num_sizes=18, + num_dir_bins=24, + with_rot=False, + mean_sizes=[[0.76966727, 0.8116021, 0.92573744], + [1.876858, 1.8425595, 1.1931566], + [0.61328, 0.6148609, 0.7182701], + [1.3955007, 1.5121545, 0.83443564], + [0.97949594, 1.0675149, 0.6329687], + [0.531663, 0.5955577, 1.7500148], + [0.9624706, 0.72462326, 1.1481868], + [0.83221924, 1.0490936, 1.6875663], + [0.21132214, 0.4206159, 0.5372846], + [1.4440073, 1.8970833, 0.26985747], + [1.0294262, 1.4040797, 0.87554324], + [1.3766412, 0.65521795, 1.6813129], + [0.6650819, 0.71111923, 1.298853], + [0.41999173, 0.37906948, 1.7513971], + [0.59359556, 0.5912492, 0.73919016], + [0.50867593, 0.50656086, 0.30136237], + [1.1511526, 1.0546296, 0.49706793], + [0.47535285, 0.49249494, 0.5802117]])))) + +data = dict(samples_per_gpu=3, workers_per_gpu=2) + +# optimizer +# yapf:disable +log_config = dict( + interval=30, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +# yapf:enable diff --git a/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py b/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py index 724635383f..5edeaad115 100644 --- a/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py +++ b/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py @@ -55,7 +55,7 @@ def encode(self, gt_bboxes_3d, gt_labels_3d): return (center_target, size_class_target, size_res_target, dir_class_target, dir_res_target) - def decode(self, bbox_out): + def decode(self, bbox_out, suffix=''): """Decode predicted parts to bbox3d. Args: @@ -66,17 +66,18 @@ def decode(self, bbox_out): - dir_res: predicted bbox direction residual. - size_class: predicted bbox size class. - size_res: predicted bbox size residual. + suffix (str): Decode predictions with specific suffix. Returns: torch.Tensor: Decoded bbox3d with shape (batch, n, 7). """ - center = bbox_out['center'] + center = bbox_out['center' + suffix] batch_size, num_proposal = center.shape[:2] # decode heading angle if self.with_rot: - dir_class = torch.argmax(bbox_out['dir_class'], -1) - dir_res = torch.gather(bbox_out['dir_res'], 2, + dir_class = torch.argmax(bbox_out['dir_class' + suffix], -1) + dir_res = torch.gather(bbox_out['dir_res' + suffix], 2, dir_class.unsqueeze(-1)) dir_res.squeeze_(2) dir_angle = self.class2angle(dir_class, dir_res).reshape( @@ -85,8 +86,9 @@ def decode(self, bbox_out): dir_angle = center.new_zeros(batch_size, num_proposal, 1) # decode bbox size - size_class = torch.argmax(bbox_out['size_class'], -1, keepdim=True) - size_res = torch.gather(bbox_out['size_res'], 2, + size_class = torch.argmax( + bbox_out['size_class' + suffix], -1, keepdim=True) + size_res = torch.gather(bbox_out['size_res' + suffix], 2, size_class.unsqueeze(-1).repeat(1, 1, 1, 3)) mean_sizes = center.new_tensor(self.mean_sizes) size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1)) diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py index 7e5cdbb75b..5d675cd411 100644 --- a/mmdet3d/core/bbox/structures/depth_box3d.py +++ b/mmdet3d/core/bbox/structures/depth_box3d.py @@ -251,3 +251,52 @@ def points_in_boxes(self, points): box_idxs_of_pts = points_in_boxes_batch(points_lidar, boxes_lidar) return box_idxs_of_pts.squeeze(0) + + def get_surface_line_center(self): + """Compute surface and line center of bounding boxes. + + Returns: + torch.Tensor: Surface and line center of bounding boxes. + """ + obj_size = self.dims + center = self.gravity_center + batch_size = center.shape[0] + + rot_sin = torch.sin(-self.yaw) + rot_cos = torch.cos(-self.yaw) + rot_mat_T = self.yaw.new_zeros(tuple(list(self.yaw.shape) + [3, 3])) + rot_mat_T[..., 0, 0] = rot_cos + rot_mat_T[..., 0, 1] = -rot_sin + rot_mat_T[..., 1, 0] = rot_sin + rot_mat_T[..., 1, 1] = rot_cos + rot_mat_T[..., 2, 2] = 1 + + # Get the object surface center + offset = obj_size.new_tensor([[0, 0, 1], [0, 0, -1], [0, 1, 0], + [0, -1, 0], [1, 0, 0], [-1, 0, 0]]) + offset = offset.view(1, 6, 3) / 2 + surface_3d = (offset * obj_size.view(batch_size, 1, 3).repeat( + 1, 6, 1)).transpose(0, 1).reshape(-1, 3) + + # Get the object line center + offset = obj_size.new_tensor([[1, 0, 1], [-1, 0, 1], [0, 1, 1], + [0, -1, 1], [1, 0, -1], [-1, 0, -1], + [0, 1, -1], [0, -1, -1], [1, 1, 0], + [1, -1, 0], [-1, 1, 0], [-1, -1, 0]]) + offset = offset.view(1, 12, 3) / 2 + + line_3d = (offset * + obj_size.view(batch_size, 1, 3).repeat(1, 12, 1)).transpose( + 0, 1).reshape(-1, 3) + + surface_rot = rot_mat_T.repeat(6, 1, 1) + surface_3d = torch.matmul( + surface_3d.unsqueeze(-2), surface_rot.transpose(2, 1)).squeeze(-2) + surface_center = center.repeat(6, 1) + surface_3d + + line_rot = rot_mat_T.repeat(12, 1, 1) + line_3d = torch.matmul( + line_3d.unsqueeze(-2), line_rot.transpose(2, 1)).squeeze(-2) + line_center = center.repeat(12, 1) + line_3d + + return surface_center, line_center diff --git a/mmdet3d/models/dense_heads/vote_head.py b/mmdet3d/models/dense_heads/vote_head.py index f9ecc063d1..c37917b86b 100644 --- a/mmdet3d/models/dense_heads/vote_head.py +++ b/mmdet3d/models/dense_heads/vote_head.py @@ -164,6 +164,7 @@ def forward(self, feat_dict, sample_mod): sample_indices) aggregated_points, features, aggregated_indices = vote_aggregation_ret results['aggregated_points'] = aggregated_points + results['aggregated_features'] = features results['aggregated_indices'] = aggregated_indices # 3. predict bbox and score @@ -183,7 +184,8 @@ def loss(self, pts_semantic_mask=None, pts_instance_mask=None, img_metas=None, - gt_bboxes_ignore=None): + gt_bboxes_ignore=None, + ret_target=False): """Compute loss. Args: @@ -199,6 +201,7 @@ def loss(self, img_metas (list[dict]): Contain pcd and img's meta info. gt_bboxes_ignore (None | list[torch.Tensor]): Specify which bounding. + ret_target (Bool): Return targets or not. Returns: dict: Losses of Votenet. @@ -283,6 +286,10 @@ def loss(self, dir_res_loss=dir_res_loss, size_class_loss=size_class_loss, size_res_loss=size_res_loss) + + if ret_target: + losses['targets'] = targets + return losses def get_targets(self, @@ -494,7 +501,12 @@ def get_targets_single(self, dir_class_targets, dir_res_targets, center_targets, mask_targets.long(), objectness_targets, objectness_masks) - def get_bboxes(self, points, bbox_preds, input_metas, rescale=False): + def get_bboxes(self, + points, + bbox_preds, + input_metas, + rescale=False, + use_nms=True): """Generate bboxes from vote head predictions. Args: @@ -502,6 +514,8 @@ def get_bboxes(self, points, bbox_preds, input_metas, rescale=False): bbox_preds (dict): Predictions from vote head. input_metas (list[dict]): Point cloud and image's meta info. rescale (bool): Whether to rescale bboxes. + use_nms (bool): Whether to apply NMS, skip nms postprocessing + while using vote head in rpn stage. Returns: list[tuple[torch.Tensor]]: Bounding boxes, scores and labels. @@ -511,19 +525,23 @@ def get_bboxes(self, points, bbox_preds, input_metas, rescale=False): sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1) bbox3d = self.bbox_coder.decode(bbox_preds) - batch_size = bbox3d.shape[0] - results = list() - for b in range(batch_size): - bbox_selected, score_selected, labels = self.multiclass_nms_single( - obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3], - input_metas[b]) - bbox = input_metas[b]['box_type_3d']( - bbox_selected, - box_dim=bbox_selected.shape[-1], - with_yaw=self.bbox_coder.with_rot) - results.append((bbox, score_selected, labels)) - - return results + if use_nms: + batch_size = bbox3d.shape[0] + results = list() + for b in range(batch_size): + bbox_selected, score_selected, labels = \ + self.multiclass_nms_single(obj_scores[b], sem_scores[b], + bbox3d[b], points[b, ..., :3], + input_metas[b]) + bbox = input_metas[b]['box_type_3d']( + bbox_selected, + box_dim=bbox_selected.shape[-1], + with_yaw=self.bbox_coder.with_rot) + results.append((bbox, score_selected, labels)) + + return results + else: + return bbox3d def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, input_meta): diff --git a/mmdet3d/models/detectors/__init__.py b/mmdet3d/models/detectors/__init__.py index 8abf95f2eb..c0995a8b1d 100644 --- a/mmdet3d/models/detectors/__init__.py +++ b/mmdet3d/models/detectors/__init__.py @@ -1,5 +1,6 @@ from .base import Base3DDetector from .dynamic_voxelnet import DynamicVoxelNet +from .h3dnet import H3DNet from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN from .mvx_two_stage import MVXTwoStageDetector from .parta2 import PartA2 @@ -8,5 +9,5 @@ __all__ = [ 'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector', - 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet' + 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet' ] diff --git a/mmdet3d/models/detectors/h3dnet.py b/mmdet3d/models/detectors/h3dnet.py new file mode 100644 index 0000000000..831f7c3491 --- /dev/null +++ b/mmdet3d/models/detectors/h3dnet.py @@ -0,0 +1,173 @@ +import torch + +from mmdet3d.core import merge_aug_bboxes_3d +from mmdet.models import DETECTORS +from .two_stage import TwoStage3DDetector + + +@DETECTORS.register_module() +class H3DNet(TwoStage3DDetector): + r"""H3DNet model. + + Please refer to the `paper `_ + """ + + def __init__(self, + backbone, + neck=None, + rpn_head=None, + roi_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None): + super(H3DNet, self).__init__( + backbone=backbone, + neck=neck, + rpn_head=rpn_head, + roi_head=roi_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained) + + def forward_train(self, + points, + img_metas, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None, + gt_bboxes_ignore=None): + """Forward of training. + + Args: + points (list[torch.Tensor]): Points of each batch. + img_metas (list): Image metas. + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. + gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. + pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic + label of each batch. + pts_instance_mask (None | list[torch.Tensor]): point-wise instance + label of each batch. + gt_bboxes_ignore (None | list[torch.Tensor]): Specify + which bounding. + + Returns: + dict: Losses. + """ + points_cat = torch.stack(points) + + feats_dict = self.extract_feat(points_cat) + feats_dict['fp_xyz'] = [feats_dict['fp_xyz_net0'][-1]] + feats_dict['fp_features'] = [feats_dict['hd_feature']] + feats_dict['fp_indices'] = [feats_dict['fp_indices_net0'][-1]] + + losses = dict() + if self.with_rpn: + rpn_outs = self.rpn_head(feats_dict, self.train_cfg.rpn.sample_mod) + feats_dict.update(rpn_outs) + + rpn_loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, + pts_semantic_mask, pts_instance_mask, img_metas) + rpn_losses = self.rpn_head.loss( + rpn_outs, + *rpn_loss_inputs, + gt_bboxes_ignore=gt_bboxes_ignore, + ret_target=True) + feats_dict['targets'] = rpn_losses.pop('targets') + losses.update(rpn_losses) + + # Generate rpn proposals + proposal_cfg = self.train_cfg.get('rpn_proposal', + self.test_cfg.rpn) + proposal_inputs = (points, rpn_outs, img_metas) + proposal_list = self.rpn_head.get_bboxes( + *proposal_inputs, use_nms=proposal_cfg.use_nms) + feats_dict['proposal_list'] = proposal_list + else: + raise NotImplementedError + + roi_losses = self.roi_head.forward_train(feats_dict, img_metas, points, + gt_bboxes_3d, gt_labels_3d, + pts_semantic_mask, + pts_instance_mask, + gt_bboxes_ignore) + losses.update(roi_losses) + + return losses + + def simple_test(self, points, img_metas, imgs=None, rescale=False): + """Forward of testing. + + Args: + points (list[torch.Tensor]): Points of each sample. + img_metas (list): Image metas. + rescale (bool): Whether to rescale results. + + Returns: + list: Predicted 3d boxes. + """ + points_cat = torch.stack(points) + + feats_dict = self.extract_feat(points_cat) + feats_dict['fp_xyz'] = [feats_dict['fp_xyz_net0'][-1]] + feats_dict['fp_features'] = [feats_dict['hd_feature']] + feats_dict['fp_indices'] = [feats_dict['fp_indices_net0'][-1]] + + if self.with_rpn: + proposal_cfg = self.test_cfg.rpn + rpn_outs = self.rpn_head(feats_dict, proposal_cfg.sample_mod) + feats_dict.update(rpn_outs) + # Generate rpn proposals + proposal_list = self.rpn_head.get_bboxes( + points, rpn_outs, img_metas, use_nms=proposal_cfg.use_nms) + feats_dict['proposal_list'] = proposal_list + else: + raise NotImplementedError + + return self.roi_head.simple_test( + feats_dict, img_metas, points_cat, rescale=rescale) + + def aug_test(self, points, img_metas, imgs=None, rescale=False): + """Test with augmentation.""" + points_cat = [torch.stack(pts) for pts in points] + feats_dict = self.extract_feats(points_cat, img_metas) + for feat_dict in feats_dict: + feat_dict['fp_xyz'] = [feat_dict['fp_xyz_net0'][-1]] + feat_dict['fp_features'] = [feat_dict['hd_feature']] + feat_dict['fp_indices'] = [feat_dict['fp_indices_net0'][-1]] + + # only support aug_test for one sample + aug_bboxes = [] + for feat_dict, pts_cat, img_meta in zip(feats_dict, points_cat, + img_metas): + if self.with_rpn: + proposal_cfg = self.test_cfg.rpn + rpn_outs = self.rpn_head(feat_dict, proposal_cfg.sample_mod) + feat_dict.update(rpn_outs) + # Generate rpn proposals + proposal_list = self.rpn_head.get_bboxes( + points, rpn_outs, img_metas, use_nms=proposal_cfg.use_nms) + feat_dict['proposal_list'] = proposal_list + else: + raise NotImplementedError + + bbox_results = self.roi_head.simple_test( + feat_dict, + self.test_cfg.rcnn.sample_mod, + img_meta, + pts_cat, + rescale=rescale) + aug_bboxes.append(bbox_results) + + # after merging, bboxes will be rescaled to the original image size + merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas, + self.bbox_head.test_cfg) + + return merged_bboxes + + def extract_feats(self, points, img_metas): + """Extract features of multiple samples.""" + return [ + self.extract_feat(pts, img_meta) + for pts, img_meta in zip(points, img_metas) + ] diff --git a/mmdet3d/models/roi_heads/__init__.py b/mmdet3d/models/roi_heads/__init__.py index be1f856e35..c93a3e3481 100644 --- a/mmdet3d/models/roi_heads/__init__.py +++ b/mmdet3d/models/roi_heads/__init__.py @@ -1,10 +1,12 @@ from .base_3droi_head import Base3DRoIHead from .bbox_heads import PartA2BboxHead -from .mask_heads import PointwiseSemanticHead +from .h3d_roi_head import H3DRoIHead +from .mask_heads import PointwiseSemanticHead, PrimitiveHead from .part_aggregation_roi_head import PartAggregationROIHead from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor __all__ = [ 'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead', - 'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor' + 'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor', + 'H3DRoIHead', 'PrimitiveHead' ] diff --git a/mmdet3d/models/roi_heads/bbox_heads/__init__.py b/mmdet3d/models/roi_heads/bbox_heads/__init__.py index 0da15dbda6..0256706a0c 100644 --- a/mmdet3d/models/roi_heads/bbox_heads/__init__.py +++ b/mmdet3d/models/roi_heads/bbox_heads/__init__.py @@ -2,9 +2,11 @@ DoubleConvFCBBoxHead, Shared2FCBBoxHead, Shared4Conv1FCBBoxHead) +from .h3d_bbox_head import H3DBboxHead from .parta2_bbox_head import PartA2BboxHead __all__ = [ 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', - 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead' + 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'H3DBboxHead', + 'PartA2BboxHead' ] diff --git a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py new file mode 100644 index 0000000000..d28242440a --- /dev/null +++ b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py @@ -0,0 +1,931 @@ +import torch +from mmcv.cnn import ConvModule +from torch import nn as nn +from torch.nn import functional as F + +from mmdet3d.core.bbox import DepthInstance3DBoxes +from mmdet3d.core.post_processing import aligned_3d_nms +from mmdet3d.models.builder import build_loss +from mmdet3d.models.losses import chamfer_distance +from mmdet3d.ops import PointSAModule +from mmdet.core import build_bbox_coder, multi_apply +from mmdet.models import HEADS + + +@HEADS.register_module() +class H3DBboxHead(nn.Module): + r"""Bbox head of `H3DNet `_. + + Args: + num_classes (int): The number of classes. + suface_matching_cfg (dict): Config for suface primitive matching. + line_matching_cfg (dict): Config for line primitive matching. + bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and + decoding boxes. + train_cfg (dict): Config for training. + test_cfg (dict): Config for testing. + gt_per_seed (int): Number of ground truth votes generated + from each seed point. + num_proposal (int): Number of proposal votes generated. + feat_channels (tuple[int]): Convolution channels of + prediction layer. + primitive_feat_refine_streams (int): The number of mlps to + refine primitive feature. + primitive_refine_channels (tuple[int]): Convolution channels of + prediction layer. + upper_thresh (float): Threshold for line matching. + surface_thresh (float): Threshold for suface matching. + line_thresh (float): Threshold for line matching. + conv_cfg (dict): Config of convolution in prediction layer. + norm_cfg (dict): Config of BN in prediction layer. + objectness_loss (dict): Config of objectness loss. + center_loss (dict): Config of center loss. + dir_class_loss (dict): Config of direction classification loss. + dir_res_loss (dict): Config of direction residual regression loss. + size_class_loss (dict): Config of size classification loss. + size_res_loss (dict): Config of size residual regression loss. + semantic_loss (dict): Config of point-wise semantic segmentation loss. + cues_objectness_loss (dict): Config of cues objectness loss. + cues_semantic_loss (dict): Config of cues semantic loss. + proposal_objectness_loss (dict): Config of proposal objectness + loss. + primitive_center_loss (dict): Config of primitive center regression + loss. + """ + + def __init__(self, + num_classes, + suface_matching_cfg, + line_matching_cfg, + bbox_coder, + train_cfg=None, + test_cfg=None, + proposal_module_cfg=None, + gt_per_seed=1, + num_proposal=256, + feat_channels=(128, 128), + primitive_feat_refine_streams=2, + primitive_refine_channels=[128, 128, 128], + upper_thresh=100.0, + surface_thresh=0.5, + line_thresh=0.5, + conv_cfg=dict(type='Conv1d'), + norm_cfg=dict(type='BN1d'), + objectness_loss=None, + center_loss=None, + dir_class_loss=None, + dir_res_loss=None, + size_class_loss=None, + size_res_loss=None, + semantic_loss=None, + cues_objectness_loss=None, + cues_semantic_loss=None, + proposal_objectness_loss=None, + primitive_center_loss=None): + super(H3DBboxHead, self).__init__() + self.num_classes = num_classes + self.train_cfg = train_cfg + self.test_cfg = test_cfg + self.gt_per_seed = gt_per_seed + self.num_proposal = num_proposal + self.with_angle = bbox_coder['with_rot'] + self.upper_thresh = upper_thresh + self.surface_thresh = surface_thresh + self.line_thresh = line_thresh + + self.objectness_loss = build_loss(objectness_loss) + self.center_loss = build_loss(center_loss) + self.dir_class_loss = build_loss(dir_class_loss) + self.dir_res_loss = build_loss(dir_res_loss) + self.size_class_loss = build_loss(size_class_loss) + self.size_res_loss = build_loss(size_res_loss) + self.semantic_loss = build_loss(semantic_loss) + + self.bbox_coder = build_bbox_coder(bbox_coder) + self.num_sizes = self.bbox_coder.num_sizes + self.num_dir_bins = self.bbox_coder.num_dir_bins + + self.cues_objectness_loss = build_loss(cues_objectness_loss) + self.cues_semantic_loss = build_loss(cues_semantic_loss) + self.proposal_objectness_loss = build_loss(proposal_objectness_loss) + self.primitive_center_loss = build_loss(primitive_center_loss) + + assert suface_matching_cfg['mlp_channels'][-1] == \ + line_matching_cfg['mlp_channels'][-1] + + # surface center matching + self.surface_center_matcher = PointSAModule(**suface_matching_cfg) + # line center matching + self.line_center_matcher = PointSAModule(**line_matching_cfg) + + # Compute the matching scores + matching_feat_dims = suface_matching_cfg['mlp_channels'][-1] + self.matching_conv = ConvModule( + matching_feat_dims, + matching_feat_dims, + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=True) + self.matching_pred = nn.Conv1d(matching_feat_dims, 2, 1) + + # Compute the semantic matching scores + self.semantic_matching_conv = ConvModule( + matching_feat_dims, + matching_feat_dims, + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=True) + self.semantic_matching_pred = nn.Conv1d(matching_feat_dims, 2, 1) + + # Surface feature aggregation + self.surface_feats_aggregation = list() + for k in range(primitive_feat_refine_streams): + self.surface_feats_aggregation.append( + ConvModule( + matching_feat_dims, + matching_feat_dims, + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=True)) + self.surface_feats_aggregation = nn.Sequential( + *self.surface_feats_aggregation) + + # Line feature aggregation + self.line_feats_aggregation = list() + for k in range(primitive_feat_refine_streams): + self.line_feats_aggregation.append( + ConvModule( + matching_feat_dims, + matching_feat_dims, + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=True)) + self.line_feats_aggregation = nn.Sequential( + *self.line_feats_aggregation) + + # surface center(6) + line center(12) + prev_channel = 18 * matching_feat_dims + self.bbox_pred = nn.ModuleList() + for k in range(len(primitive_refine_channels)): + self.bbox_pred.append( + ConvModule( + prev_channel, + primitive_refine_channels[k], + 1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True, + inplace=False)) + prev_channel = primitive_refine_channels[k] + + # Final object detection + # Objectness scores (2), center residual (3), + # heading class+residual (num_heading_bin*2), size class + + # residual(num_size_cluster*4) + conv_out_channel = (2 + 3 + bbox_coder['num_dir_bins'] * 2 + + bbox_coder['num_sizes'] * 4 + self.num_classes) + self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1)) + + def init_weights(self, pretrained=None): + """Initialize the weights in detector. + + Args: + pretrained (str, optional): Path to pre-trained weights. + Defaults to None. + """ + pass + + def forward(self, feats_dict, sample_mod): + """Forward pass. + + Args: + feats_dict (dict): Feature dict from backbone. + sample_mod (str): Sample mode for vote aggregation layer. + valid modes are "vote", "seed" and "random". + + Returns: + dict: Predictions of vote head. + """ + ret_dict = {} + aggregated_points = feats_dict['aggregated_points'] + original_feature = feats_dict['aggregated_features'] + batch_size = original_feature.shape[0] + object_proposal = original_feature.shape[2] + + # Extract surface center, features and semantic predictions + z_center = feats_dict['pred_z_center'] + xy_center = feats_dict['pred_xy_center'] + z_semantic = feats_dict['sem_cls_scores_z'] + xy_semantic = feats_dict['sem_cls_scores_xy'] + z_feature = feats_dict['aggregated_features_z'] + xy_feature = feats_dict['aggregated_features_xy'] + # Extract line points and features + line_center = feats_dict['pred_line_center'] + line_feature = feats_dict['aggregated_features_line'] + + surface_center_pred = torch.cat((z_center, xy_center), dim=1) + ret_dict['surface_center_pred'] = surface_center_pred + ret_dict['surface_sem_pred'] = torch.cat((z_semantic, xy_semantic), + dim=1) + + # Extract the surface and line centers of rpn proposals + rpn_proposals = feats_dict['proposal_list'] + rpn_proposals_bbox = DepthInstance3DBoxes( + rpn_proposals.reshape(-1, 7).clone(), + box_dim=rpn_proposals.shape[-1], + with_yaw=self.with_angle, + origin=(0.5, 0.5, 0.5)) + + obj_surface_center, obj_line_center = \ + rpn_proposals_bbox.get_surface_line_center() + obj_surface_center = obj_surface_center.reshape( + batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3) + obj_line_center = obj_line_center.reshape(batch_size, -1, 12, + 3).transpose(1, 2).reshape( + batch_size, -1, 3) + ret_dict['surface_center_object'] = obj_surface_center + ret_dict['line_center_object'] = obj_line_center + + # aggregate primitive z and xy features to rpn proposals + surface_center_feature_pred = torch.cat((z_feature, xy_feature), dim=2) + surface_center_feature_pred = torch.cat( + (surface_center_feature_pred.new_zeros( + (batch_size, 6, surface_center_feature_pred.shape[2])), + surface_center_feature_pred), + dim=1) + + surface_xyz, surface_features, _ = self.surface_center_matcher( + surface_center_pred, + surface_center_feature_pred, + target_xyz=obj_surface_center) + + # aggregate primitive line features to rpn proposals + line_feature = torch.cat((line_feature.new_zeros( + (batch_size, 12, line_feature.shape[2])), line_feature), + dim=1) + line_xyz, line_features, _ = self.line_center_matcher( + line_center, line_feature, target_xyz=obj_line_center) + + # combine the surface and line features + combine_features = torch.cat((surface_features, line_features), dim=2) + + matching_features = self.matching_conv(combine_features) + matching_score = self.matching_pred(matching_features) + ret_dict['matching_score'] = matching_score.transpose(2, 1) + + semantic_matching_features = self.semantic_matching_conv( + combine_features) + semantic_matching_score = self.semantic_matching_pred( + semantic_matching_features) + ret_dict['semantic_matching_score'] = \ + semantic_matching_score.transpose(2, 1) + + surface_features = self.surface_feats_aggregation(surface_features) + line_features = self.line_feats_aggregation(line_features) + + # Combine all surface and line features + surface_features = surface_features.view(batch_size, -1, + object_proposal) + line_features = line_features.view(batch_size, -1, object_proposal) + + combine_feature = torch.cat((surface_features, line_features), dim=1) + + # Final bbox predictions + bbox_predictions = self.bbox_pred[0](combine_feature) + bbox_predictions += original_feature + for conv_module in self.bbox_pred[1:]: + bbox_predictions = conv_module(bbox_predictions) + + refine_decode_res = self.bbox_coder.split_pred(bbox_predictions, + aggregated_points) + for key in refine_decode_res.keys(): + ret_dict[key + '_optimized'] = refine_decode_res[key] + return ret_dict + + def loss(self, + bbox_preds, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None, + img_metas=None, + rpn_targets=None, + gt_bboxes_ignore=None): + """Compute loss. + + Args: + bbox_preds (dict): Predictions from forward of h3d bbox head. + points (list[torch.Tensor]): Input points. + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + bboxes of each sample. + gt_labels_3d (list[torch.Tensor]): Labels of each sample. + pts_semantic_mask (None | list[torch.Tensor]): Point-wise + semantic mask. + pts_instance_mask (None | list[torch.Tensor]): Point-wise + instance mask. + img_metas (list[dict]): Contain pcd and img's meta info. + rpn_targets (Tuple) : Targets generated by rpn head. + gt_bboxes_ignore (None | list[torch.Tensor]): Specify + which bounding. + + Returns: + dict: Losses of H3dnet. + """ + (vote_targets, vote_target_masks, size_class_targets, size_res_targets, + dir_class_targets, dir_res_targets, center_targets, mask_targets, + valid_gt_masks, objectness_targets, objectness_weights, + box_loss_weights, valid_gt_weights) = rpn_targets + + losses = {} + + # calculate refined proposal loss + refined_proposal_loss = self.get_proposal_stage_loss( + bbox_preds, + size_class_targets, + size_res_targets, + dir_class_targets, + dir_res_targets, + center_targets, + mask_targets, + objectness_targets, + objectness_weights, + box_loss_weights, + valid_gt_weights, + suffix='_optimized') + for key in refined_proposal_loss.keys(): + losses[key + '_optimized'] = refined_proposal_loss[key] + + bbox3d_optimized = self.bbox_coder.decode( + bbox_preds, suffix='_optimized') + + targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d, + pts_semantic_mask, pts_instance_mask, + bbox_preds) + + (cues_objectness_label, cues_sem_label, proposal_objectness_label, + cues_mask, cues_match_mask, proposal_objectness_mask, + cues_matching_label, obj_surface_line_center) = targets + + # match scores for each geometric primitive + objectness_scores = bbox_preds['matching_score'] + # match scores for the semantics of primitives + objectness_scores_sem = bbox_preds['semantic_matching_score'] + + primitive_objectness_loss = self.cues_objectness_loss( + objectness_scores.transpose(2, 1), + cues_objectness_label, + weight=cues_mask, + avg_factor=cues_mask.sum() + 1e-6) + + primitive_sem_loss = self.cues_semantic_loss( + objectness_scores_sem.transpose(2, 1), + cues_sem_label, + weight=cues_mask, + avg_factor=cues_mask.sum() + 1e-6) + + objectness_scores = bbox_preds['obj_scores_optimized'] + objectness_loss_refine = self.proposal_objectness_loss( + objectness_scores.transpose(2, 1), proposal_objectness_label) + primitive_matching_loss = (objectness_loss_refine * + cues_match_mask).sum() / ( + cues_match_mask.sum() + 1e-6) * 0.5 + primitive_sem_matching_loss = ( + objectness_loss_refine * proposal_objectness_mask).sum() / ( + proposal_objectness_mask.sum() + 1e-6) * 0.5 + + # Get the object surface center here + batch_size, object_proposal = bbox3d_optimized.shape[:2] + refined_bbox = DepthInstance3DBoxes( + bbox3d_optimized.reshape(-1, 7).clone(), + box_dim=bbox3d_optimized.shape[-1], + with_yaw=self.with_angle, + origin=(0.5, 0.5, 0.5)) + + pred_obj_surface_center, pred_obj_line_center = \ + refined_bbox.get_surface_line_center() + pred_obj_surface_center = pred_obj_surface_center.reshape( + batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3) + pred_obj_line_center = pred_obj_line_center.reshape( + batch_size, -1, 12, 3).transpose(1, 2).reshape(batch_size, -1, 3) + pred_surface_line_center = torch.cat( + (pred_obj_surface_center, pred_obj_line_center), 1) + + square_dist = self.primitive_center_loss(pred_surface_line_center, + obj_surface_line_center) + + match_dist = torch.sqrt(square_dist.sum(dim=-1) + 1e-6) + primitive_centroid_reg_loss = torch.sum( + match_dist * cues_matching_label) / ( + cues_matching_label.sum() + 1e-6) + + refined_loss = dict( + primitive_objectness_loss=primitive_objectness_loss, + primitive_sem_loss=primitive_sem_loss, + primitive_matching_loss=primitive_matching_loss, + primitive_sem_matching_loss=primitive_sem_matching_loss, + primitive_centroid_reg_loss=primitive_centroid_reg_loss) + + losses.update(refined_loss) + + return losses + + def get_bboxes(self, + points, + bbox_preds, + input_metas, + rescale=False, + suffix=''): + """Generate bboxes from vote head predictions. + + Args: + points (torch.Tensor): Input points. + bbox_preds (dict): Predictions from vote head. + input_metas (list[dict]): Point cloud and image's meta info. + rescale (bool): Whether to rescale bboxes. + + Returns: + list[tuple[torch.Tensor]]: Bounding boxes, scores and labels. + """ + # decode boxes + obj_scores = F.softmax( + bbox_preds['obj_scores' + suffix], dim=-1)[..., -1] + + sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1) + + prediction_collection = {} + prediction_collection['center'] = bbox_preds['center' + suffix] + prediction_collection['dir_class'] = bbox_preds['dir_class'] + prediction_collection['dir_res'] = bbox_preds['dir_res' + suffix] + prediction_collection['size_class'] = bbox_preds['size_class'] + prediction_collection['size_res'] = bbox_preds['size_res' + suffix] + + bbox3d = self.bbox_coder.decode(prediction_collection) + + batch_size = bbox3d.shape[0] + results = list() + for b in range(batch_size): + bbox_selected, score_selected, labels = self.multiclass_nms_single( + obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3], + input_metas[b]) + bbox = input_metas[b]['box_type_3d']( + bbox_selected, + box_dim=bbox_selected.shape[-1], + with_yaw=self.bbox_coder.with_rot) + results.append((bbox, score_selected, labels)) + + return results + + def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, + input_meta): + """Multi-class nms in single batch. + + Args: + obj_scores (torch.Tensor): Objectness score of bounding boxes. + sem_scores (torch.Tensor): semantic class score of bounding boxes. + bbox (torch.Tensor): Predicted bounding boxes. + points (torch.Tensor): Input points. + input_meta (dict): Point cloud and image's meta info. + + Returns: + tuple[torch.Tensor]: Bounding boxes, scores and labels. + """ + bbox = input_meta['box_type_3d']( + bbox, + box_dim=bbox.shape[-1], + with_yaw=self.bbox_coder.with_rot, + origin=(0.5, 0.5, 0.5)) + box_indices = bbox.points_in_boxes(points) + + corner3d = bbox.corners + minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) + minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0] + minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0] + + nonempty_box_mask = box_indices.T.sum(1) > 5 + + bbox_classes = torch.argmax(sem_scores, -1) + nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask], + obj_scores[nonempty_box_mask], + bbox_classes[nonempty_box_mask], + self.test_cfg.nms_thr) + + # filter empty boxes and boxes with low score + scores_mask = (obj_scores > self.test_cfg.score_thr) + nonempty_box_inds = torch.nonzero(nonempty_box_mask).flatten() + nonempty_mask = torch.zeros_like(bbox_classes).scatter( + 0, nonempty_box_inds[nms_selected], 1) + selected = (nonempty_mask.bool() & scores_mask.bool()) + + if self.test_cfg.per_class_proposal: + bbox_selected, score_selected, labels = [], [], [] + for k in range(sem_scores.shape[-1]): + bbox_selected.append(bbox[selected].tensor) + score_selected.append(obj_scores[selected] * + sem_scores[selected][:, k]) + labels.append( + torch.zeros_like(bbox_classes[selected]).fill_(k)) + bbox_selected = torch.cat(bbox_selected, 0) + score_selected = torch.cat(score_selected, 0) + labels = torch.cat(labels, 0) + else: + bbox_selected = bbox[selected].tensor + score_selected = obj_scores[selected] + labels = bbox_classes[selected] + + return bbox_selected, score_selected, labels + + def get_proposal_stage_loss(self, + bbox_preds, + size_class_targets, + size_res_targets, + dir_class_targets, + dir_res_targets, + center_targets, + mask_targets, + objectness_targets, + objectness_weights, + box_loss_weights, + valid_gt_weights, + suffix=''): + """Compute loss for the aggregation module. + + Args: + bbox_preds (dict): Predictions from forward of vote head. + size_class_targets (torch.Tensor): Ground truth \ + size class of each prediction bounding box. + size_res_targets (torch.Tensor): Ground truth \ + size residual of each prediction bounding box. + dir_class_targets (torch.Tensor): Ground truth \ + direction class of each prediction bounding box. + dir_res_targets (torch.Tensor): Ground truth \ + direction residual of each prediction bounding box. + center_targets (torch.Tensor): Ground truth center \ + of each prediction bounding box. + mask_targets (torch.Tensor): Validation of each \ + prediction bounding box. + objectness_targets (torch.Tensor): Ground truth \ + objectness label of each prediction bounding box. + objectness_weights (torch.Tensor): Weights of objectness \ + loss for each prediction bounding box. + box_loss_weights (torch.Tensor): Weights of regression \ + loss for each prediction bounding box. + valid_gt_weights (torch.Tensor): Validation of each \ + ground truth bounding box. + + Returns: + dict: Losses of aggregation module. + """ + # calculate objectness loss + objectness_loss = self.objectness_loss( + bbox_preds['obj_scores' + suffix].transpose(2, 1), + objectness_targets, + weight=objectness_weights) + + # calculate center loss + source2target_loss, target2source_loss = self.center_loss( + bbox_preds['center' + suffix], + center_targets, + src_weight=box_loss_weights, + dst_weight=valid_gt_weights) + center_loss = source2target_loss + target2source_loss + + # calculate direction class loss + dir_class_loss = self.dir_class_loss( + bbox_preds['dir_class' + suffix].transpose(2, 1), + dir_class_targets, + weight=box_loss_weights) + + # calculate direction residual loss + batch_size, proposal_num = size_class_targets.shape[:2] + heading_label_one_hot = dir_class_targets.new_zeros( + (batch_size, proposal_num, self.num_dir_bins)) + heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1) + dir_res_norm = (bbox_preds['dir_res_norm' + suffix] * + heading_label_one_hot).sum(dim=-1) + dir_res_loss = self.dir_res_loss( + dir_res_norm, dir_res_targets, weight=box_loss_weights) + + # calculate size class loss + size_class_loss = self.size_class_loss( + bbox_preds['size_class' + suffix].transpose(2, 1), + size_class_targets, + weight=box_loss_weights) + + # calculate size residual loss + one_hot_size_targets = box_loss_weights.new_zeros( + (batch_size, proposal_num, self.num_sizes)) + one_hot_size_targets.scatter_(2, size_class_targets.unsqueeze(-1), 1) + one_hot_size_targets_expand = one_hot_size_targets.unsqueeze( + -1).repeat(1, 1, 1, 3) + size_residual_norm = (bbox_preds['size_res_norm' + suffix] * + one_hot_size_targets_expand).sum(dim=2) + box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat( + 1, 1, 3) + size_res_loss = self.size_res_loss( + size_residual_norm, + size_res_targets, + weight=box_loss_weights_expand) + + # calculate semantic loss + semantic_loss = self.semantic_loss( + bbox_preds['sem_scores' + suffix].transpose(2, 1), + mask_targets, + weight=box_loss_weights) + + losses = dict( + objectness_loss=objectness_loss, + semantic_loss=semantic_loss, + center_loss=center_loss, + dir_class_loss=dir_class_loss, + dir_res_loss=dir_res_loss, + size_class_loss=size_class_loss, + size_res_loss=size_res_loss) + + return losses + + def get_targets(self, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None, + bbox_preds=None): + """Generate targets of proposal module. + + Args: + points (list[torch.Tensor]): Points of each batch. + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + bboxes of each batch. + gt_labels_3d (list[torch.Tensor]): Labels of each batch. + pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + label of each batch. + pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + label of each batch. + bbox_preds (torch.Tensor): Bounding box predictions of vote head. + + Returns: + tuple[torch.Tensor]: Targets of proposal module. + """ + # find empty example + valid_gt_masks = list() + gt_num = list() + for index in range(len(gt_labels_3d)): + if len(gt_labels_3d[index]) == 0: + fake_box = gt_bboxes_3d[index].tensor.new_zeros( + 1, gt_bboxes_3d[index].tensor.shape[-1]) + gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box) + gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1) + valid_gt_masks.append(gt_labels_3d[index].new_zeros(1)) + gt_num.append(1) + else: + valid_gt_masks.append(gt_labels_3d[index].new_ones( + gt_labels_3d[index].shape)) + gt_num.append(gt_labels_3d[index].shape[0]) + + if pts_semantic_mask is None: + pts_semantic_mask = [None for i in range(len(gt_labels_3d))] + pts_instance_mask = [None for i in range(len(gt_labels_3d))] + + aggregated_points = [ + bbox_preds['aggregated_points'][i] + for i in range(len(gt_labels_3d)) + ] + + surface_center_pred = [ + bbox_preds['surface_center_pred'][i] + for i in range(len(gt_labels_3d)) + ] + + line_center_pred = [ + bbox_preds['pred_line_center'][i] + for i in range(len(gt_labels_3d)) + ] + + surface_center_object = [ + bbox_preds['surface_center_object'][i] + for i in range(len(gt_labels_3d)) + ] + + line_center_object = [ + bbox_preds['line_center_object'][i] + for i in range(len(gt_labels_3d)) + ] + + surface_sem_pred = [ + bbox_preds['surface_sem_pred'][i] + for i in range(len(gt_labels_3d)) + ] + + line_sem_pred = [ + bbox_preds['sem_cls_scores_line'][i] + for i in range(len(gt_labels_3d)) + ] + + (cues_objectness_label, cues_sem_label, proposal_objectness_label, + cues_mask, cues_match_mask, proposal_objectness_mask, + cues_matching_label, obj_surface_line_center) = multi_apply( + self.get_targets_single, points, gt_bboxes_3d, gt_labels_3d, + pts_semantic_mask, pts_instance_mask, aggregated_points, + surface_center_pred, line_center_pred, surface_center_object, + line_center_object, surface_sem_pred, line_sem_pred) + + cues_objectness_label = torch.stack(cues_objectness_label) + cues_sem_label = torch.stack(cues_sem_label) + proposal_objectness_label = torch.stack(proposal_objectness_label) + cues_mask = torch.stack(cues_mask) + cues_match_mask = torch.stack(cues_match_mask) + proposal_objectness_mask = torch.stack(proposal_objectness_mask) + cues_matching_label = torch.stack(cues_matching_label) + obj_surface_line_center = torch.stack(obj_surface_line_center) + + return (cues_objectness_label, cues_sem_label, + proposal_objectness_label, cues_mask, cues_match_mask, + proposal_objectness_mask, cues_matching_label, + obj_surface_line_center) + + def get_targets_single(self, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask=None, + pts_instance_mask=None, + aggregated_points=None, + pred_surface_center=None, + pred_line_center=None, + pred_obj_surface_center=None, + pred_obj_line_center=None, + pred_surface_sem=None, + pred_line_sem=None): + """Generate targets for primitive cues for single batch. + + Args: + points (torch.Tensor): Points of each batch. + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + boxes of each batch. + gt_labels_3d (torch.Tensor): Labels of each batch. + pts_semantic_mask (None | torch.Tensor): Point-wise semantic + label of each batch. + pts_instance_mask (None | torch.Tensor): Point-wise instance + label of each batch. + aggregated_points (torch.Tensor): Aggregated points from + vote aggregation layer. + pred_surface_center (torch.Tensor): Prediction of surface center. + pred_line_center (torch.Tensor): Prediction of line center. + pred_obj_surface_center (torch.Tensor): Objectness prediction \ + of surface center. + pred_obj_line_center (torch.Tensor): Objectness prediction of \ + line center. + pred_surface_sem (torch.Tensor): Semantic prediction of \ + surface center. + pred_line_sem (torch.Tensor): Semantic prediction of line center. + Returns: + tuple[torch.Tensor]: Targets for primitive cues. + """ + device = points.device + gt_bboxes_3d = gt_bboxes_3d.to(device) + num_proposals = aggregated_points.shape[0] + gt_center = gt_bboxes_3d.gravity_center + + dist1, dist2, ind1, _ = chamfer_distance( + aggregated_points.unsqueeze(0), + gt_center.unsqueeze(0), + reduction='none') + # Set assignment + object_assignment = ind1.squeeze(0) + + # Generate objectness label and mask + # objectness_label: 1 if pred object center is within + # self.train_cfg['near_threshold'] of any GT object + # objectness_mask: 0 if pred object center is in gray + # zone (DONOTCARE), 1 otherwise + euclidean_dist1 = torch.sqrt(dist1.squeeze(0) + 1e-6) + proposal_objectness_label = euclidean_dist1.new_zeros( + num_proposals, dtype=torch.long) + proposal_objectness_mask = euclidean_dist1.new_zeros(num_proposals) + + gt_sem = gt_labels_3d[object_assignment] + + obj_surface_center, obj_line_center = \ + gt_bboxes_3d.get_surface_line_center() + obj_surface_center = obj_surface_center.reshape(-1, 6, + 3).transpose(0, 1) + obj_line_center = obj_line_center.reshape(-1, 12, 3).transpose(0, 1) + obj_surface_center = obj_surface_center[:, object_assignment].reshape( + 1, -1, 3) + obj_line_center = obj_line_center[:, + object_assignment].reshape(1, -1, 3) + + surface_sem = torch.argmax(pred_surface_sem, dim=1).float() + line_sem = torch.argmax(pred_line_sem, dim=1).float() + + dist_surface, _, surface_ind, _ = chamfer_distance( + obj_surface_center, + pred_surface_center.unsqueeze(0), + reduction='none') + dist_line, _, line_ind, _ = chamfer_distance( + obj_line_center, pred_line_center.unsqueeze(0), reduction='none') + + surface_sel = pred_surface_center[surface_ind.squeeze(0)] + line_sel = pred_line_center[line_ind.squeeze(0)] + surface_sel_sem = surface_sem[surface_ind.squeeze(0)] + line_sel_sem = line_sem[line_ind.squeeze(0)] + + surface_sel_sem_gt = gt_sem.repeat(6).float() + line_sel_sem_gt = gt_sem.repeat(12).float() + + euclidean_dist_surface = torch.sqrt(dist_surface.squeeze(0) + 1e-6) + euclidean_dist_line = torch.sqrt(dist_line.squeeze(0) + 1e-6) + objectness_label_surface = euclidean_dist_line.new_zeros( + num_proposals * 6, dtype=torch.long) + objectness_mask_surface = euclidean_dist_line.new_zeros(num_proposals * + 6) + objectness_label_line = euclidean_dist_line.new_zeros( + num_proposals * 12, dtype=torch.long) + objectness_mask_line = euclidean_dist_line.new_zeros(num_proposals * + 12) + objectness_label_surface_sem = euclidean_dist_line.new_zeros( + num_proposals * 6, dtype=torch.long) + objectness_label_line_sem = euclidean_dist_line.new_zeros( + num_proposals * 12, dtype=torch.long) + + euclidean_dist_obj_surface = torch.sqrt(( + (pred_obj_surface_center - surface_sel)**2).sum(dim=-1) + 1e-6) + euclidean_dist_obj_line = torch.sqrt( + torch.sum((pred_obj_line_center - line_sel)**2, dim=-1) + 1e-6) + + # Objectness score just with centers + proposal_objectness_label[ + euclidean_dist1 < self.train_cfg['near_threshold']] = 1 + proposal_objectness_mask[ + euclidean_dist1 < self.train_cfg['near_threshold']] = 1 + proposal_objectness_mask[ + euclidean_dist1 > self.train_cfg['far_threshold']] = 1 + + objectness_label_surface[ + (euclidean_dist_obj_surface < + self.train_cfg['label_surface_threshold']) * + (euclidean_dist_surface < + self.train_cfg['mask_surface_threshold'])] = 1 + objectness_label_surface_sem[ + (euclidean_dist_obj_surface < + self.train_cfg['label_surface_threshold']) * + (euclidean_dist_surface < self.train_cfg['mask_surface_threshold']) + * (surface_sel_sem == surface_sel_sem_gt)] = 1 + + objectness_label_line[ + (euclidean_dist_obj_line < self.train_cfg['label_line_threshold']) + * + (euclidean_dist_line < self.train_cfg['mask_line_threshold'])] = 1 + objectness_label_line_sem[ + (euclidean_dist_obj_line < self.train_cfg['label_line_threshold']) + * (euclidean_dist_line < self.train_cfg['mask_line_threshold']) * + (line_sel_sem == line_sel_sem_gt)] = 1 + + objectness_label_surface_obj = proposal_objectness_label.repeat(6) + objectness_mask_surface_obj = proposal_objectness_mask.repeat(6) + objectness_label_line_obj = proposal_objectness_label.repeat(12) + objectness_mask_line_obj = proposal_objectness_mask.repeat(12) + + objectness_mask_surface = objectness_mask_surface_obj + objectness_mask_line = objectness_mask_line_obj + + cues_objectness_label = torch.cat( + (objectness_label_surface, objectness_label_line), 0) + cues_sem_label = torch.cat( + (objectness_label_surface_sem, objectness_label_line_sem), 0) + cues_mask = torch.cat((objectness_mask_surface, objectness_mask_line), + 0) + + objectness_label_surface *= objectness_label_surface_obj + objectness_label_line *= objectness_label_line_obj + cues_matching_label = torch.cat( + (objectness_label_surface, objectness_label_line), 0) + + objectness_label_surface_sem *= objectness_label_surface_obj + objectness_label_line_sem *= objectness_label_line_obj + + cues_match_mask = (torch.sum( + cues_objectness_label.view(18, num_proposals), dim=0) >= + 1).float() + + obj_surface_line_center = torch.cat( + (obj_surface_center, obj_line_center), 1).squeeze(0) + + return (cues_objectness_label, cues_sem_label, + proposal_objectness_label, cues_mask, cues_match_mask, + proposal_objectness_mask, cues_matching_label, + obj_surface_line_center) diff --git a/mmdet3d/models/roi_heads/h3d_roi_head.py b/mmdet3d/models/roi_heads/h3d_roi_head.py new file mode 100644 index 0000000000..a2aada2dd7 --- /dev/null +++ b/mmdet3d/models/roi_heads/h3d_roi_head.py @@ -0,0 +1,158 @@ +from mmdet3d.core.bbox import bbox3d2result +from mmdet.models import HEADS +from ..builder import build_head +from .base_3droi_head import Base3DRoIHead + + +@HEADS.register_module() +class H3DRoIHead(Base3DRoIHead): + """H3D roi head for H3DNet. + + Args: + primitive_list (List): Configs of primitive heads. + bbox_head (ConfigDict): Config of bbox_head. + train_cfg (ConfigDict): Training config. + test_cfg (ConfigDict): Testing config. + """ + + def __init__(self, + primitive_list, + bbox_head=None, + train_cfg=None, + test_cfg=None): + super(H3DRoIHead, self).__init__( + bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg) + # Primitive module + assert len(primitive_list) == 3 + self.primitive_z = build_head(primitive_list[0]) + self.primitive_xy = build_head(primitive_list[1]) + self.primitive_line = build_head(primitive_list[2]) + + def init_weights(self, pretrained): + """Initialize weights, skip since ``H3DROIHead`` does not need to + initialize weights.""" + pass + + def init_mask_head(self): + """Initialize mask head, skip since ``H3DROIHead`` does not have + one.""" + pass + + def init_bbox_head(self, bbox_head): + """Initialize box head.""" + bbox_head['train_cfg'] = self.train_cfg + bbox_head['test_cfg'] = self.test_cfg + self.bbox_head = build_head(bbox_head) + + def init_assigner_sampler(self): + """Initialize assigner and sampler.""" + pass + + def forward_train(self, + feats_dict, + img_metas, + points, + gt_bboxes_3d, + gt_labels_3d, + pts_semantic_mask, + pts_instance_mask, + gt_bboxes_ignore=None): + """Training forward function of PartAggregationROIHead. + + Args: + feats_dict (dict): Contains features from the first stage. + img_metas (list[dict]): Contain pcd and img's meta info. + points (list[torch.Tensor]): Input points. + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + bboxes of each sample. + gt_labels_3d (list[torch.Tensor]): Labels of each sample. + pts_semantic_mask (None | list[torch.Tensor]): Point-wise + semantic mask. + pts_instance_mask (None | list[torch.Tensor]): Point-wise + instance mask. + gt_bboxes_ignore (None | list[torch.Tensor]): Specify + which bounding. + + Returns: + dict: losses from each head. + """ + losses = dict() + + sample_mod = self.train_cfg.sample_mod + assert sample_mod in ['vote', 'seed', 'random'] + result_z = self.primitive_z(feats_dict, sample_mod) + feats_dict.update(result_z) + + result_xy = self.primitive_xy(feats_dict, sample_mod) + feats_dict.update(result_xy) + + result_line = self.primitive_line(feats_dict, sample_mod) + feats_dict.update(result_line) + + primitive_loss_inputs = (feats_dict, points, gt_bboxes_3d, + gt_labels_3d, pts_semantic_mask, + pts_instance_mask, img_metas, + gt_bboxes_ignore) + + loss_z = self.primitive_z.loss(*primitive_loss_inputs) + losses.update(loss_z) + + loss_xy = self.primitive_xy.loss(*primitive_loss_inputs) + losses.update(loss_xy) + + loss_line = self.primitive_line.loss(*primitive_loss_inputs) + losses.update(loss_line) + + targets = feats_dict.pop('targets') + + bbox_results = self.bbox_head(feats_dict, sample_mod) + + feats_dict.update(bbox_results) + bbox_loss = self.bbox_head.loss(feats_dict, points, gt_bboxes_3d, + gt_labels_3d, pts_semantic_mask, + pts_instance_mask, img_metas, targets, + gt_bboxes_ignore) + losses.update(bbox_loss) + + return losses + + def simple_test(self, feats_dict, img_metas, points, rescale=False): + """Simple testing forward function of PartAggregationROIHead. + + Note: + This function assumes that the batch size is 1 + + Args: + feats_dict (dict): Contains features from the first stage. + img_metas (list[dict]): Contain pcd and img's meta info. + points (torch.Tensor): Input points. + rescale (bool): Whether to rescale results. + + Returns: + dict: Bbox results of one frame. + """ + sample_mod = self.test.sample_mod + assert sample_mod in ['vote', 'seed', 'random'] + + result_z = self.primitive_z(feats_dict, sample_mod) + feats_dict.update(result_z) + + result_xy = self.primitive_xy(feats_dict, sample_mod) + feats_dict.update(result_xy) + + result_line = self.primitive_line(feats_dict, sample_mod) + feats_dict.update(result_line) + + bbox_preds = self.bbox_head(feats_dict, sample_mod) + feats_dict.update(bbox_preds) + bbox_list = self.bbox_head.get_bboxes( + points, + feats_dict, + img_metas, + rescale=rescale, + suffix='_optimized') + bbox_results = [ + bbox3d2result(bboxes, scores, labels) + for bboxes, scores, labels in bbox_list + ] + return bbox_results[0] diff --git a/tests/test_box3d.py b/tests/test_box3d.py index 4369ec8123..5076b4331d 100644 --- a/tests/test_box3d.py +++ b/tests/test_box3d.py @@ -1135,6 +1135,54 @@ def test_depth_boxes3d(): dtype=torch.int32) assert torch.all(box_idxs_of_pts == expected_idxs_of_pts) + # test get_surface_line_center + boxes = torch.tensor( + [[0.3294, 1.0359, 0.1171, 1.0822, 1.1247, 1.3721, 0.4916], + [-2.4630, -2.6324, -0.1616, 0.9202, 1.7896, 0.1992, 0.3185]]) + boxes = DepthInstance3DBoxes( + boxes, box_dim=boxes.shape[-1], with_yaw=True, origin=(0.5, 0.5, 0.5)) + surface_center, line_center = boxes.get_surface_line_center() + expected_surface_center = torch.tensor([[0.3294, 1.0359, 0.8031], + [-2.4630, -2.6324, -0.0620], + [0.3294, 1.0359, -0.5689], + [-2.4630, -2.6324, -0.2612], + [0.5949, 1.5317, 0.1171], + [-2.1828, -1.7826, -0.1616], + [0.0640, 0.5401, 0.1171], + [-2.7432, -3.4822, -0.1616], + [0.8064, 0.7805, 0.1171], + [-2.0260, -2.7765, -0.1616], + [-0.1476, 1.2913, 0.1171], + [-2.9000, -2.4883, -0.1616]]) + + expected_line_center = torch.tensor([[0.8064, 0.7805, 0.8031], + [-2.0260, -2.7765, -0.0620], + [-0.1476, 1.2913, 0.8031], + [-2.9000, -2.4883, -0.0620], + [0.5949, 1.5317, 0.8031], + [-2.1828, -1.7826, -0.0620], + [0.0640, 0.5401, 0.8031], + [-2.7432, -3.4822, -0.0620], + [0.8064, 0.7805, -0.5689], + [-2.0260, -2.7765, -0.2612], + [-0.1476, 1.2913, -0.5689], + [-2.9000, -2.4883, -0.2612], + [0.5949, 1.5317, -0.5689], + [-2.1828, -1.7826, -0.2612], + [0.0640, 0.5401, -0.5689], + [-2.7432, -3.4822, -0.2612], + [1.0719, 1.2762, 0.1171], + [-1.7458, -1.9267, -0.1616], + [0.5410, 0.2847, 0.1171], + [-2.3062, -3.6263, -0.1616], + [0.1178, 1.7871, 0.1171], + [-2.6198, -1.6385, -0.1616], + [-0.4131, 0.7956, 0.1171], + [-3.1802, -3.3381, -0.1616]]) + + assert torch.allclose(surface_center, expected_surface_center, atol=1e-04) + assert torch.allclose(line_center, expected_line_center, atol=1e-04) + def test_rotation_3d_in_axis(): points = torch.tensor([[[-0.4599, -0.0471, 0.0000], diff --git a/tests/test_heads.py b/tests/test_heads.py index 637ca5751f..28232f6396 100644 --- a/tests/test_heads.py +++ b/tests/test_heads.py @@ -569,3 +569,92 @@ def test_primitive_head(): with pytest.raises(AssertionError): primitive_head_cfg['vote_moudule_cfg']['in_channels'] = 'xyz' build_head(primitive_head_cfg) + + +def test_h3d_head(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + _setup_seed(0) + + h3d_head_cfg = _get_roi_head_cfg('h3dnet/h3dnet_8x8_scannet-3d-18class.py') + self = build_head(h3d_head_cfg).cuda() + + # prepare roi outputs + fp_xyz = [torch.rand([1, 1024, 3], dtype=torch.float32).cuda()] + hd_features = torch.rand([1, 256, 1024], dtype=torch.float32).cuda() + fp_indices = [torch.randint(0, 128, [1, 1024]).cuda()] + aggregated_points = torch.rand([1, 256, 3], dtype=torch.float32).cuda() + aggregated_features = torch.rand([1, 128, 256], dtype=torch.float32).cuda() + rpn_proposals = torch.cat([ + torch.rand([1, 256, 3], dtype=torch.float32).cuda() * 4 - 2, + torch.rand([1, 256, 3], dtype=torch.float32).cuda() * 4, + torch.zeros([1, 256, 1]).cuda() + ], + dim=-1) + + input_dict = dict( + fp_xyz_net0=fp_xyz, + hd_feature=hd_features, + aggregated_points=aggregated_points, + aggregated_features=aggregated_features, + seed_points=fp_xyz[0], + seed_indices=fp_indices[0], + rpn_proposals=rpn_proposals) + + # prepare gt label + from mmdet3d.core.bbox import DepthInstance3DBoxes + gt_bboxes_3d = [ + DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()), + DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()) + ] + gt_labels_3d = torch.randint(0, 18, [1, 4]).cuda() + gt_labels_3d = [gt_labels_3d[0]] + pts_semantic_mask = torch.randint(0, 19, [1, 1024]).cuda() + pts_semantic_mask = [pts_semantic_mask[0]] + pts_instance_mask = torch.randint(0, 4, [1, 1024]).cuda() + pts_instance_mask = [pts_instance_mask[0]] + points = torch.rand([1, 1024, 3], dtype=torch.float32).cuda() + + # prepare rpn targets + vote_targets = torch.rand([1, 1024, 9], dtype=torch.float32).cuda() + vote_target_masks = torch.rand([1, 1024], dtype=torch.float32).cuda() + size_class_targets = torch.rand([1, 256], + dtype=torch.float32).cuda().long() + size_res_targets = torch.rand([1, 256, 3], dtype=torch.float32).cuda() + dir_class_targets = torch.rand([1, 256], dtype=torch.float32).cuda().long() + dir_res_targets = torch.rand([1, 256], dtype=torch.float32).cuda() + center_targets = torch.rand([1, 4, 3], dtype=torch.float32).cuda() + mask_targets = torch.rand([1, 256], dtype=torch.float32).cuda().long() + valid_gt_masks = torch.rand([1, 4], dtype=torch.float32).cuda() + objectness_targets = torch.rand([1, 256], + dtype=torch.float32).cuda().long() + objectness_weights = torch.rand([1, 256], dtype=torch.float32).cuda() + box_loss_weights = torch.rand([1, 256], dtype=torch.float32).cuda() + valid_gt_weights = torch.rand([1, 4], dtype=torch.float32).cuda() + + targets = (vote_targets, vote_target_masks, size_class_targets, + size_res_targets, dir_class_targets, dir_res_targets, + center_targets, mask_targets, valid_gt_masks, + objectness_targets, objectness_weights, box_loss_weights, + valid_gt_weights) + + input_dict['targets'] = targets + + # train forward + ret_dict = self.forward_train( + input_dict, + 'vote', + points=points, + gt_bboxes_3d=gt_bboxes_3d, + gt_labels_3d=gt_labels_3d, + pts_semantic_mask=pts_semantic_mask, + pts_instance_mask=pts_instance_mask, + img_metas=None) + + assert ret_dict['flag_loss_z'] >= 0 + assert ret_dict['vote_loss_z'] >= 0 + assert ret_dict['center_loss_z'] >= 0 + assert ret_dict['size_loss_z'] >= 0 + assert ret_dict['sem_loss_z'] >= 0 + assert ret_dict['objectness_loss_opt'] >= 0 + assert ret_dict['primitive_sem_matching_loss'] >= 0