From 80469168bd70a919387e303e09e8da24f7c78bd9 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Fri, 31 Jul 2020 10:02:53 +0800 Subject: [PATCH 01/15] Add centerpoint_rpn and scn, change pillar encoder and voxel_encoder --- mmdet3d/models/backbones/__init__.py | 3 +- mmdet3d/models/middle_encoders/scn.py | 148 +++++++++++++++ mmdet3d/models/necks/__init__.py | 3 +- mmdet3d/models/necks/centerpoint_rpn.py | 169 ++++++++++++++++++ .../models/voxel_encoders/pillar_encoder.py | 13 +- .../models/voxel_encoders/voxel_encoder.py | 5 +- tests/test_middle_encoders.py | 22 +++ tests/test_necks.py | 20 +++ .../test_voxel_encoders.py | 52 ++++++ 9 files changed, 424 insertions(+), 11 deletions(-) create mode 100644 mmdet3d/models/middle_encoders/scn.py create mode 100644 mmdet3d/models/necks/centerpoint_rpn.py create mode 100644 tests/test_middle_encoders.py create mode 100644 tests/test_necks.py create mode 100644 tests/test_voxel_encoders/test_voxel_encoders.py diff --git a/mmdet3d/models/backbones/__init__.py b/mmdet3d/models/backbones/__init__.py index 46c9b44aa3..76fd2c14df 100644 --- a/mmdet3d/models/backbones/__init__.py +++ b/mmdet3d/models/backbones/__init__.py @@ -1,3 +1,4 @@ +from mmdet3d.models.middle_encoders.scn import SpMiddleResNetFHD from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt from .nostem_regnet import NoStemRegNet from .pointnet2_sa_ssg import PointNet2SASSG @@ -5,5 +6,5 @@ __all__ = [ 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet', - 'SECOND', 'PointNet2SASSG' + 'SECOND', 'PointNet2SASSG', 'SpMiddleResNetFHD' ] diff --git a/mmdet3d/models/middle_encoders/scn.py b/mmdet3d/models/middle_encoders/scn.py new file mode 100644 index 0000000000..2621096f5c --- /dev/null +++ b/mmdet3d/models/middle_encoders/scn.py @@ -0,0 +1,148 @@ +import numpy as np +from mmcv.cnn import build_norm_layer +from torch import nn + +from mmdet3d.models.builder import MIDDLE_ENCODERS +from mmdet3d.ops import spconv +from mmdet3d.ops.spconv.conv import SparseConv3d, SubMConv3d + + +class SparseBasicBlock(spconv.SparseModule): + expansion = 1 + + def __init__( + self, + inplanes, + planes, + kernal_size=3, + stride=1, + bias=False, + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), + downsample=None, + indice_key=None, + ): + super(SparseBasicBlock, self).__init__() + + self.conv1 = spconv.SubMConv3d( + inplanes, + planes, + kernel_size=kernal_size, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn1 = build_norm_layer(norm_cfg, planes)[1] + self.relu = nn.ReLU() + self.conv2 = spconv.SubMConv3d( + planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn2 = build_norm_layer(norm_cfg, planes)[1] + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out.features = self.bn1(out.features) + out.features = self.relu(out.features) + + out = self.conv2(out) + out.features = self.bn2(out.features) + + if self.downsample is not None: + identity = self.downsample(x) + + out.features += identity.features + out.features = self.relu(out.features) + + return out + + +@MIDDLE_ENCODERS.register_module +class SpMiddleResNetFHD(nn.Module): + """Sparse Middle ResNet FHD. + + Middle encoder used by CenterPoint. + + Args: + num_input_features (int): Number of input features. + Default: 128. + + norm_cfg (dict): Configuration of normalization. + Default: dict(type='BN1d', eps=1e-3, momentum=0.01). + """ + + def __init__(self, + num_input_features=128, + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), + **kwargs): + super(SpMiddleResNetFHD, self).__init__() + + self.dcn = None + self.zero_init_residual = False + + # input: # [1600, 1200, 41] + self.middle_conv = spconv.SparseSequential( + SubMConv3d( + num_input_features, 16, 3, bias=False, indice_key='res0'), + build_norm_layer(norm_cfg, 16)[1], + nn.ReLU(), + SparseBasicBlock(16, 16, indice_key='res0'), + SparseBasicBlock(16, 16, indice_key='res0'), + SparseConv3d(16, 32, 3, 2, padding=1, + bias=False), # [1600, 1200, 41] -> [800, 600, 21] + build_norm_layer(norm_cfg, 32)[1], + nn.ReLU(), + SparseBasicBlock(32, 32, indice_key='res1'), + SparseBasicBlock(32, 32, indice_key='res1'), + SparseConv3d(32, 64, 3, 2, padding=1, + bias=False), # [800, 600, 21] -> [400, 300, 11] + build_norm_layer(norm_cfg, 64)[1], + nn.ReLU(), + SparseBasicBlock(64, 64, indice_key='res2'), + SparseBasicBlock(64, 64, indice_key='res2'), + SparseConv3d(64, 128, 3, 2, padding=[0, 1, 1], + bias=False), # [400, 300, 11] -> [200, 150, 5] + build_norm_layer(norm_cfg, 128)[1], + nn.ReLU(), + SparseBasicBlock(128, 128, indice_key='res3'), + SparseBasicBlock(128, 128, indice_key='res3'), + SparseConv3d(128, 128, (3, 1, 1), (2, 1, 1), + bias=False), # [200, 150, 5] -> [200, 150, 2] + build_norm_layer(norm_cfg, 128)[1], + nn.ReLU(), + ) + + def forward(self, voxel_features, coors, batch_size, input_shape): + """Forward of SpMiddleResNetFHD. + + Args: + voxel_features (torch.Tensor): Voxel features with the + shape of [N, 5]. + coors (torch.Tensor): Voxel features with the shape of [N, 4]. + batch_size (int): Batch size. + input_shape (np.ndarray): Shape of input. + + Returns: + torch.Tensor: Result tensor. + """ + sparse_shape = np.array(input_shape[::-1]) + [1, 0, 0] + + coors = coors.int() + ret = spconv.SparseConvTensor(voxel_features, coors, sparse_shape, + batch_size) + ret = self.middle_conv(ret) + ret = ret.dense() + + N, C, D, H, W = ret.shape + ret = ret.view(N, C * D, H, W) + + return ret diff --git a/mmdet3d/models/necks/__init__.py b/mmdet3d/models/necks/__init__.py index 85904b497c..fdb1840087 100644 --- a/mmdet3d/models/necks/__init__.py +++ b/mmdet3d/models/necks/__init__.py @@ -1,4 +1,5 @@ from mmdet.models.necks.fpn import FPN +from .centerpoint_rpn import CenterPointRPN from .second_fpn import SECONDFPN -__all__ = ['FPN', 'SECONDFPN'] +__all__ = ['FPN', 'SECONDFPN', 'CenterPointRPN'] diff --git a/mmdet3d/models/necks/centerpoint_rpn.py b/mmdet3d/models/necks/centerpoint_rpn.py new file mode 100644 index 0000000000..13c7359712 --- /dev/null +++ b/mmdet3d/models/necks/centerpoint_rpn.py @@ -0,0 +1,169 @@ +import numpy as np +import torch +from mmcv.cnn import build_norm_layer, xavier_init +from torch import nn as nn + +from mmdet.models import NECKS + + +@NECKS.register_module +class CenterPointRPN(nn.Module): + """RPN used in CenterPoint. + + Args: + layer_nums (list[int]): Number of layers for each block. + downsample_strides (list[int]): Strides used to + downsample the feature maps. + downsample_channels (list[int]): Output channels + of downsamplesample feature maps. + upsample_strides (list[float]): Strides used to + upsample the feature maps. + upsample_channels (list[int]): Output channels + of upsample feature maps. + in_channels (int): Input channels of + feature map. + norm_cfg (dict): Configuration of norm layer. + """ + + def __init__( + self, + layer_nums, + downsample_strides, + downsample_channels, + upsample_strides, + upsample_channels, + input_channels, + norm_cfg, + ): + super(CenterPointRPN, self).__init__() + self._layer_strides = downsample_strides + self._num_filters = downsample_channels + self._layer_nums = layer_nums + self._upsample_strides = upsample_strides + self._num_upsample_filters = upsample_channels + self._num_input_features = input_channels + + if norm_cfg is None: + norm_cfg = dict(type='BN', eps=1e-3, momentum=0.01) + self._norm_cfg = norm_cfg + + assert len(self._layer_strides) == len(self._layer_nums) + assert len(self._num_filters) == len(self._layer_nums) + assert len(self._num_upsample_filters) == len(self._upsample_strides) + + self._upsample_start_idx = len(self._layer_nums) - len( + self._upsample_strides) + + must_equal_list = [] + for i in range(len(self._upsample_strides)): + # print(upsample_strides[i]) + must_equal_list.append(self._upsample_strides[i] / np.prod( + self._layer_strides[:i + self._upsample_start_idx + 1])) + + for val in must_equal_list: + assert val == must_equal_list[0] + + in_filters = [self._num_input_features, *self._num_filters[:-1]] + blocks = [] + deblocks = [] + + for i, layer_num in enumerate(self._layer_nums): + block, num_out_filters = self._make_layer( + in_filters[i], + self._num_filters[i], + layer_num, + stride=self._layer_strides[i], + ) + blocks.append(block) + if i - self._upsample_start_idx >= 0: + stride = (self._upsample_strides[i - self._upsample_start_idx]) + if stride > 1: + deblock = nn.Sequential( + nn.ConvTranspose2d( + num_out_filters, + self._num_upsample_filters[ + i - self._upsample_start_idx], + stride, + stride=stride, + bias=False, + ), + build_norm_layer( + self._norm_cfg, + self._num_upsample_filters[ + i - self._upsample_start_idx], + )[1], + nn.ReLU(), + ) + else: + stride = np.round(1 / stride).astype(np.int64) + deblock = nn.Sequential( + nn.Conv2d( + num_out_filters, + self._num_upsample_filters[ + i - self._upsample_start_idx], + stride, + stride=stride, + bias=False, + ), + build_norm_layer( + self._norm_cfg, + self._num_upsample_filters[ + i - self._upsample_start_idx], + )[1], + nn.ReLU(), + ) + deblocks.append(deblock) + self.blocks = nn.ModuleList(blocks) + self.deblocks = nn.ModuleList(deblocks) + + @property + def downsample_factor(self): + factor = np.prod(self._layer_strides) + if len(self._upsample_strides) > 0: + factor /= self._upsample_strides[-1] + return factor + + def _make_layer(self, inplanes, planes, num_blocks, stride=1): + + block_list = [ + nn.ZeroPad2d(1), + nn.Conv2d(inplanes, planes, 3, stride=stride, bias=False), + build_norm_layer(self._norm_cfg, planes)[1], + nn.ReLU() + ] + + for j in range(num_blocks): + block_list.append( + nn.Conv2d(planes, planes, 3, padding=1, bias=False)) + block_list.append( + build_norm_layer(self._norm_cfg, planes)[1], + # nn.BatchNorm2d(planes, eps=1e-3, momentum=0.01) + ) + block_list.append(nn.ReLU()) + + block = nn.Sequential(*block_list) + return block, planes + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, x): + """Forward of CenterPointRPN. + + Args: + x (torch.Tensor): Input feature with the shape of [B, C, H, M]. + + Returns: + torch.Tensor: Concatenate features. + """ + ups = [] + for i in range(len(self.blocks)): + x = self.blocks[i](x) + if i - self._upsample_start_idx >= 0: + ups.append(self.deblocks[i - self._upsample_start_idx](x)) + if len(ups) > 0: + x = torch.cat(ups, dim=1) + return x diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 58a28cd58a..570cceac69 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -104,14 +104,13 @@ def forward(self, features, num_points, coors): features_ls.append(f_cluster) # Find distance of x, y, and z from pillar center + dtype = features.dtype if self._with_voxel_center: - f_center = features[:, :, :2] - f_center[:, :, 0] = f_center[:, :, 0] - ( - coors[:, 3].type_as(features).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = f_center[:, :, 1] - ( - coors[:, 2].type_as(features).unsqueeze(1) * self.vy + - self.y_offset) + f_center = torch.zeros_like(features[:, :, :2]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset) features_ls.append(f_center) if self._with_distance: diff --git a/mmdet3d/models/voxel_encoders/voxel_encoder.py b/mmdet3d/models/voxel_encoders/voxel_encoder.py index 67a4ab8162..7bd1400b53 100644 --- a/mmdet3d/models/voxel_encoders/voxel_encoder.py +++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py @@ -15,8 +15,9 @@ class HardSimpleVFE(nn.Module): It simply averages the values of points in a voxel. """ - def __init__(self): + def __init__(self, num_features=4): super(HardSimpleVFE, self).__init__() + self.num_features = num_features def forward(self, features, num_points, coors): """Forward function. @@ -32,7 +33,7 @@ def forward(self, features, num_points, coors): Returns: torch.Tensor: Mean of points inside each voxel in shape (N, 3(4)) """ - points_mean = features[:, :, :4].sum( + points_mean = features[:, :, :self.num_features].sum( dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) return points_mean.contiguous() diff --git a/tests/test_middle_encoders.py b/tests/test_middle_encoders.py new file mode 100644 index 0000000000..1de83d0221 --- /dev/null +++ b/tests/test_middle_encoders.py @@ -0,0 +1,22 @@ +import numpy as np +import pytest +import torch + +from mmdet3d.models.builder import build_middle_encoder + + +def test_sp_middle_resnet_FHD(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + sp_middle_resnet_FHD_cfg = dict( + type='SpMiddleResNetFHD', num_input_features=5, ds_factor=8) + + sp_middle_resnet_FHD = build_middle_encoder( + sp_middle_resnet_FHD_cfg).cuda() + + voxel_features = torch.rand([207842, 5]).cuda() + coors = torch.randint(0, 4, [207842, 4]).cuda() + + ret = sp_middle_resnet_FHD(voxel_features, coors, 4, + np.array([1024, 1024, 40])) + assert ret.shape == torch.Size([4, 256, 128, 128]) diff --git a/tests/test_necks.py b/tests/test_necks.py new file mode 100644 index 0000000000..685dd5c65c --- /dev/null +++ b/tests/test_necks.py @@ -0,0 +1,20 @@ +import torch + +from mmdet3d.models.builder import build_neck + + +def test_centerpoint_rpn(): + centerpoint_rpn_cfg = dict( + type='CenterPointRPN', + layer_nums=[3, 5, 5], + downsample_strides=[2, 2, 2], + downsample_channels=[64, 128, 256], + upsample_strides=[0.5, 1, 2], + upsample_channels=[128, 128, 128], + input_channels=64, + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01)) + centerpoint_rpn = build_neck(centerpoint_rpn_cfg) + centerpoint_rpn.init_weights() + input = torch.rand([4, 64, 512, 512]) + output = centerpoint_rpn(input) + assert output.shape == torch.Size([4, 384, 128, 128]) diff --git a/tests/test_voxel_encoders/test_voxel_encoders.py b/tests/test_voxel_encoders/test_voxel_encoders.py new file mode 100644 index 0000000000..3fedaf5a4d --- /dev/null +++ b/tests/test_voxel_encoders/test_voxel_encoders.py @@ -0,0 +1,52 @@ +import numpy as np +import torch + +from mmdet3d.models.builder import build_voxel_encoder + + +def _set_seed(): + torch.manual_seed(0) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + np.random.seed(0) + + +def test_pillar_feature_net(): + _set_seed() + pillar_feature_net_cfg = dict( + type='PillarFeatureNet', + in_channels=5, + feat_channels=[64], + with_distance=False, + voxel_size=(0.2, 0.2, 8), + point_cloud_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0), + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), + ) + + pillar_feature_net = build_voxel_encoder(pillar_feature_net_cfg) + + norm_layer_weight = torch.from_numpy( + np.load('tests/test_voxel_encoders/norm_weight.npy')) + pillar_feature_net.pfn_layers[0].norm.weight = torch.nn.Parameter( + norm_layer_weight) + features = torch.from_numpy( + np.load('tests/test_voxel_encoders/input_features.npy')) + num_voxels = torch.from_numpy( + np.load('tests/test_voxel_encoders/num_voxels.npy')) + coors = torch.from_numpy(np.load('tests/test_voxel_encoders/coors.npy')) + + expected_features = torch.from_numpy( + np.load('tests/test_voxel_encoders/expected_features.npy')) + + features = pillar_feature_net(features, num_voxels, coors) + assert torch.allclose(features, expected_features) + + +def test_hard_simple_VFE(): + hard_simple_VFE_cfg = dict(type='HardSimpleVFE', num_features=5) + hard_simple_VFE = build_voxel_encoder(hard_simple_VFE_cfg) + features = torch.rand([240000, 10, 5]) + num_voxels = torch.randint(0, 100, [240000]) + + outputs = hard_simple_VFE(features, num_voxels, None) + assert outputs.shape == torch.Size([240000, 5]) From 54fe4c82e54a609e11fa80747f79f0f4cb60a833 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Fri, 31 Jul 2020 10:38:07 +0800 Subject: [PATCH 02/15] Move test_voxel_encoders. --- .../test_voxel_encoders.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) rename tests/{test_voxel_encoders => }/test_voxel_encoders.py (63%) diff --git a/tests/test_voxel_encoders/test_voxel_encoders.py b/tests/test_voxel_encoders.py similarity index 63% rename from tests/test_voxel_encoders/test_voxel_encoders.py rename to tests/test_voxel_encoders.py index 3fedaf5a4d..c2f2a82ebc 100644 --- a/tests/test_voxel_encoders/test_voxel_encoders.py +++ b/tests/test_voxel_encoders.py @@ -25,21 +25,12 @@ def test_pillar_feature_net(): pillar_feature_net = build_voxel_encoder(pillar_feature_net_cfg) - norm_layer_weight = torch.from_numpy( - np.load('tests/test_voxel_encoders/norm_weight.npy')) - pillar_feature_net.pfn_layers[0].norm.weight = torch.nn.Parameter( - norm_layer_weight) - features = torch.from_numpy( - np.load('tests/test_voxel_encoders/input_features.npy')) - num_voxels = torch.from_numpy( - np.load('tests/test_voxel_encoders/num_voxels.npy')) - coors = torch.from_numpy(np.load('tests/test_voxel_encoders/coors.npy')) - - expected_features = torch.from_numpy( - np.load('tests/test_voxel_encoders/expected_features.npy')) + features = torch.rand([97297, 20, 5]) + num_voxels = torch.randint(1, 100, [97297]) + coors = torch.randint(0, 100, [97297, 4]) features = pillar_feature_net(features, num_voxels, coors) - assert torch.allclose(features, expected_features) + assert features.shape == torch.Size([97297, 64]) def test_hard_simple_VFE(): From 878b24f50030b4ccb9c956d59a944bdaf844c658 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Sat, 1 Aug 2020 22:17:53 +0800 Subject: [PATCH 03/15] Change names, add docstring. --- mmdet3d/models/middle_encoders/scn.py | 21 ++++-- mmdet3d/models/necks/centerpoint_rpn.py | 95 ++++++++++++------------- 2 files changed, 62 insertions(+), 54 deletions(-) diff --git a/mmdet3d/models/middle_encoders/scn.py b/mmdet3d/models/middle_encoders/scn.py index 2621096f5c..e7723a08fd 100644 --- a/mmdet3d/models/middle_encoders/scn.py +++ b/mmdet3d/models/middle_encoders/scn.py @@ -8,6 +8,22 @@ class SparseBasicBlock(spconv.SparseModule): + """Basic block for SpMiddleResNetFHD. + + Args: + inplanes (int): Number of input features. + planes (int): Number of output features. + kernel_size (int): Kernel_size of the conv layer. + Default: 3. + stride (int): Stride of the conv layer. + Default: 1. + bias (bool): Whether to use bias. + Default: False. + norm_cfg (dict): Configuration of normalization. + Default: dict(type='BN1d', eps=1e-3, momentum=0.01). + indice_key (str): Indice key for spconv. + Default: None. + """ expansion = 1 def __init__( @@ -18,7 +34,6 @@ def __init__( stride=1, bias=False, norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), - downsample=None, indice_key=None, ): super(SparseBasicBlock, self).__init__() @@ -44,7 +59,6 @@ def __init__( indice_key=indice_key, ) self.bn2 = build_norm_layer(norm_cfg, planes)[1] - self.downsample = downsample self.stride = stride def forward(self, x): @@ -57,9 +71,6 @@ def forward(self, x): out = self.conv2(out) out.features = self.bn2(out.features) - if self.downsample is not None: - identity = self.downsample(x) - out.features += identity.features out.features = self.relu(out.features) diff --git a/mmdet3d/models/necks/centerpoint_rpn.py b/mmdet3d/models/necks/centerpoint_rpn.py index 13c7359712..c6014f45cd 100644 --- a/mmdet3d/models/necks/centerpoint_rpn.py +++ b/mmdet3d/models/necks/centerpoint_rpn.py @@ -26,71 +26,68 @@ class CenterPointRPN(nn.Module): """ def __init__( - self, - layer_nums, - downsample_strides, - downsample_channels, - upsample_strides, - upsample_channels, - input_channels, - norm_cfg, + self, + layer_nums, + downsample_strides, + downsample_channels, + upsample_strides, + upsample_channels, + input_channels, + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), ): super(CenterPointRPN, self).__init__() - self._layer_strides = downsample_strides - self._num_filters = downsample_channels - self._layer_nums = layer_nums - self._upsample_strides = upsample_strides - self._num_upsample_filters = upsample_channels - self._num_input_features = input_channels + self.downsample_strides = downsample_strides + self.downsample_channels = downsample_channels + self.layer_nums = layer_nums + self.upsample_strides = upsample_strides + self.upsample_channels = upsample_channels + self.input_channels = input_channels + self.norm_cfg = norm_cfg - if norm_cfg is None: - norm_cfg = dict(type='BN', eps=1e-3, momentum=0.01) - self._norm_cfg = norm_cfg + assert len(self.downsample_strides) == len(self.layer_nums) + assert len(self.downsample_channels) == len(self.layer_nums) + assert len(self.upsample_channels) == len(self.upsample_strides) - assert len(self._layer_strides) == len(self._layer_nums) - assert len(self._num_filters) == len(self._layer_nums) - assert len(self._num_upsample_filters) == len(self._upsample_strides) - - self._upsample_start_idx = len(self._layer_nums) - len( - self._upsample_strides) + self.upsample_start_idx = len(self.layer_nums) - len( + self.upsample_strides) must_equal_list = [] - for i in range(len(self._upsample_strides)): + for i in range(len(self.upsample_strides)): # print(upsample_strides[i]) - must_equal_list.append(self._upsample_strides[i] / np.prod( - self._layer_strides[:i + self._upsample_start_idx + 1])) + must_equal_list.append(self.upsample_strides[i] / np.prod( + self.downsample_strides[:i + self.upsample_start_idx + 1])) for val in must_equal_list: assert val == must_equal_list[0] - in_filters = [self._num_input_features, *self._num_filters[:-1]] + in_filters = [self.input_channels, *self.downsample_channels[:-1]] blocks = [] deblocks = [] - for i, layer_num in enumerate(self._layer_nums): + for i, layer_num in enumerate(self.layer_nums): block, num_out_filters = self._make_layer( in_filters[i], - self._num_filters[i], + self.downsample_channels[i], layer_num, - stride=self._layer_strides[i], + stride=self.downsample_strides[i], ) blocks.append(block) - if i - self._upsample_start_idx >= 0: - stride = (self._upsample_strides[i - self._upsample_start_idx]) + if i - self.upsample_start_idx >= 0: + stride = (self.upsample_strides[i - self.upsample_start_idx]) if stride > 1: deblock = nn.Sequential( nn.ConvTranspose2d( num_out_filters, - self._num_upsample_filters[ - i - self._upsample_start_idx], + self.upsample_channels[i - + self.upsample_start_idx], stride, stride=stride, bias=False, ), build_norm_layer( - self._norm_cfg, - self._num_upsample_filters[ - i - self._upsample_start_idx], + self.norm_cfg, + self.upsample_channels[i - + self.upsample_start_idx], )[1], nn.ReLU(), ) @@ -99,16 +96,16 @@ def __init__( deblock = nn.Sequential( nn.Conv2d( num_out_filters, - self._num_upsample_filters[ - i - self._upsample_start_idx], + self.upsample_channels[i - + self.upsample_start_idx], stride, stride=stride, bias=False, ), build_norm_layer( - self._norm_cfg, - self._num_upsample_filters[ - i - self._upsample_start_idx], + self.norm_cfg, + self.upsample_channels[i - + self.upsample_start_idx], )[1], nn.ReLU(), ) @@ -118,9 +115,9 @@ def __init__( @property def downsample_factor(self): - factor = np.prod(self._layer_strides) - if len(self._upsample_strides) > 0: - factor /= self._upsample_strides[-1] + factor = np.prod(self.downsample_strides) + if len(self.upsample_strides) > 0: + factor /= self.upsample_strides[-1] return factor def _make_layer(self, inplanes, planes, num_blocks, stride=1): @@ -128,7 +125,7 @@ def _make_layer(self, inplanes, planes, num_blocks, stride=1): block_list = [ nn.ZeroPad2d(1), nn.Conv2d(inplanes, planes, 3, stride=stride, bias=False), - build_norm_layer(self._norm_cfg, planes)[1], + build_norm_layer(self.norm_cfg, planes)[1], nn.ReLU() ] @@ -136,7 +133,7 @@ def _make_layer(self, inplanes, planes, num_blocks, stride=1): block_list.append( nn.Conv2d(planes, planes, 3, padding=1, bias=False)) block_list.append( - build_norm_layer(self._norm_cfg, planes)[1], + build_norm_layer(self.norm_cfg, planes)[1], # nn.BatchNorm2d(planes, eps=1e-3, momentum=0.01) ) block_list.append(nn.ReLU()) @@ -162,8 +159,8 @@ def forward(self, x): ups = [] for i in range(len(self.blocks)): x = self.blocks[i](x) - if i - self._upsample_start_idx >= 0: - ups.append(self.deblocks[i - self._upsample_start_idx](x)) + if i - self.upsample_start_idx >= 0: + ups.append(self.deblocks[i - self.upsample_start_idx](x)) if len(ups) > 0: x = torch.cat(ups, dim=1) return x From bde41e757ed49f32fbc0aba79d7d8f144e5403b0 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Sun, 2 Aug 2020 11:21:49 +0800 Subject: [PATCH 04/15] Reconstruct centerpoint_rpn. --- mmdet3d/models/necks/__init__.py | 4 +- mmdet3d/models/necks/centerpoint_rpn.py | 166 ------------------------ tests/test_necks.py | 33 +++-- 3 files changed, 24 insertions(+), 179 deletions(-) delete mode 100644 mmdet3d/models/necks/centerpoint_rpn.py diff --git a/mmdet3d/models/necks/__init__.py b/mmdet3d/models/necks/__init__.py index fdb1840087..0a10a512f4 100644 --- a/mmdet3d/models/necks/__init__.py +++ b/mmdet3d/models/necks/__init__.py @@ -1,5 +1,5 @@ from mmdet.models.necks.fpn import FPN -from .centerpoint_rpn import CenterPointRPN +from .centerpoint_fpn import CenterPointFPN from .second_fpn import SECONDFPN -__all__ = ['FPN', 'SECONDFPN', 'CenterPointRPN'] +__all__ = ['FPN', 'SECONDFPN', 'CenterPointFPN'] diff --git a/mmdet3d/models/necks/centerpoint_rpn.py b/mmdet3d/models/necks/centerpoint_rpn.py deleted file mode 100644 index c6014f45cd..0000000000 --- a/mmdet3d/models/necks/centerpoint_rpn.py +++ /dev/null @@ -1,166 +0,0 @@ -import numpy as np -import torch -from mmcv.cnn import build_norm_layer, xavier_init -from torch import nn as nn - -from mmdet.models import NECKS - - -@NECKS.register_module -class CenterPointRPN(nn.Module): - """RPN used in CenterPoint. - - Args: - layer_nums (list[int]): Number of layers for each block. - downsample_strides (list[int]): Strides used to - downsample the feature maps. - downsample_channels (list[int]): Output channels - of downsamplesample feature maps. - upsample_strides (list[float]): Strides used to - upsample the feature maps. - upsample_channels (list[int]): Output channels - of upsample feature maps. - in_channels (int): Input channels of - feature map. - norm_cfg (dict): Configuration of norm layer. - """ - - def __init__( - self, - layer_nums, - downsample_strides, - downsample_channels, - upsample_strides, - upsample_channels, - input_channels, - norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), - ): - super(CenterPointRPN, self).__init__() - self.downsample_strides = downsample_strides - self.downsample_channels = downsample_channels - self.layer_nums = layer_nums - self.upsample_strides = upsample_strides - self.upsample_channels = upsample_channels - self.input_channels = input_channels - self.norm_cfg = norm_cfg - - assert len(self.downsample_strides) == len(self.layer_nums) - assert len(self.downsample_channels) == len(self.layer_nums) - assert len(self.upsample_channels) == len(self.upsample_strides) - - self.upsample_start_idx = len(self.layer_nums) - len( - self.upsample_strides) - - must_equal_list = [] - for i in range(len(self.upsample_strides)): - # print(upsample_strides[i]) - must_equal_list.append(self.upsample_strides[i] / np.prod( - self.downsample_strides[:i + self.upsample_start_idx + 1])) - - for val in must_equal_list: - assert val == must_equal_list[0] - - in_filters = [self.input_channels, *self.downsample_channels[:-1]] - blocks = [] - deblocks = [] - - for i, layer_num in enumerate(self.layer_nums): - block, num_out_filters = self._make_layer( - in_filters[i], - self.downsample_channels[i], - layer_num, - stride=self.downsample_strides[i], - ) - blocks.append(block) - if i - self.upsample_start_idx >= 0: - stride = (self.upsample_strides[i - self.upsample_start_idx]) - if stride > 1: - deblock = nn.Sequential( - nn.ConvTranspose2d( - num_out_filters, - self.upsample_channels[i - - self.upsample_start_idx], - stride, - stride=stride, - bias=False, - ), - build_norm_layer( - self.norm_cfg, - self.upsample_channels[i - - self.upsample_start_idx], - )[1], - nn.ReLU(), - ) - else: - stride = np.round(1 / stride).astype(np.int64) - deblock = nn.Sequential( - nn.Conv2d( - num_out_filters, - self.upsample_channels[i - - self.upsample_start_idx], - stride, - stride=stride, - bias=False, - ), - build_norm_layer( - self.norm_cfg, - self.upsample_channels[i - - self.upsample_start_idx], - )[1], - nn.ReLU(), - ) - deblocks.append(deblock) - self.blocks = nn.ModuleList(blocks) - self.deblocks = nn.ModuleList(deblocks) - - @property - def downsample_factor(self): - factor = np.prod(self.downsample_strides) - if len(self.upsample_strides) > 0: - factor /= self.upsample_strides[-1] - return factor - - def _make_layer(self, inplanes, planes, num_blocks, stride=1): - - block_list = [ - nn.ZeroPad2d(1), - nn.Conv2d(inplanes, planes, 3, stride=stride, bias=False), - build_norm_layer(self.norm_cfg, planes)[1], - nn.ReLU() - ] - - for j in range(num_blocks): - block_list.append( - nn.Conv2d(planes, planes, 3, padding=1, bias=False)) - block_list.append( - build_norm_layer(self.norm_cfg, planes)[1], - # nn.BatchNorm2d(planes, eps=1e-3, momentum=0.01) - ) - block_list.append(nn.ReLU()) - - block = nn.Sequential(*block_list) - return block, planes - - # default init_weights for conv(msra) and norm in ConvModule - def init_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - xavier_init(m, distribution='uniform') - - def forward(self, x): - """Forward of CenterPointRPN. - - Args: - x (torch.Tensor): Input feature with the shape of [B, C, H, M]. - - Returns: - torch.Tensor: Concatenate features. - """ - ups = [] - for i in range(len(self.blocks)): - x = self.blocks[i](x) - if i - self.upsample_start_idx >= 0: - ups.append(self.deblocks[i - self.upsample_start_idx](x)) - if len(ups) > 0: - x = torch.cat(ups, dim=1) - return x diff --git a/tests/test_necks.py b/tests/test_necks.py index 685dd5c65c..02b836d0a6 100644 --- a/tests/test_necks.py +++ b/tests/test_necks.py @@ -1,20 +1,31 @@ import torch -from mmdet3d.models.builder import build_neck +from mmdet3d.models.builder import build_backbone, build_neck def test_centerpoint_rpn(): - centerpoint_rpn_cfg = dict( - type='CenterPointRPN', + second_cfg = dict( + type='SECOND', + in_channels=64, + out_channels=[64, 128, 256], layer_nums=[3, 5, 5], - downsample_strides=[2, 2, 2], - downsample_channels=[64, 128, 256], + layer_strides=[2, 2, 2], + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + conv_cfg=dict(type='Conv2d', bias=False)) + + second = build_backbone(second_cfg) + + centerpoint_fpn_cfg = dict( + type='CenterPointFPN', + in_channels=[64, 128, 256], + out_channels=[128, 128, 128], upsample_strides=[0.5, 1, 2], - upsample_channels=[128, 128, 128], - input_channels=64, - norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01)) - centerpoint_rpn = build_neck(centerpoint_rpn_cfg) - centerpoint_rpn.init_weights() + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + upsample_cfg=dict(type='deconv', bias=False)) + + centerpoint_fpn = build_neck(centerpoint_fpn_cfg) + input = torch.rand([4, 64, 512, 512]) - output = centerpoint_rpn(input) + sec_output = second(input) + output = centerpoint_fpn(sec_output) assert output.shape == torch.Size([4, 384, 128, 128]) From 0ac6625d2d05fafe6b0b637540e97d90e54e3596 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Sun, 2 Aug 2020 11:27:01 +0800 Subject: [PATCH 05/15] Add centerpoint_rpn. --- mmdet3d/models/necks/centerpoint_fpn.py | 91 +++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 mmdet3d/models/necks/centerpoint_fpn.py diff --git a/mmdet3d/models/necks/centerpoint_fpn.py b/mmdet3d/models/necks/centerpoint_fpn.py new file mode 100644 index 0000000000..88c44c23d9 --- /dev/null +++ b/mmdet3d/models/necks/centerpoint_fpn.py @@ -0,0 +1,91 @@ +import numpy as np +import torch +from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer, + constant_init, is_norm, kaiming_init) +from torch import nn as nn + +from mmdet.models import NECKS + + +@NECKS.register_module() +class CenterPointFPN(nn.Module): + """FPN used in SECOND/PointPillars/PartA2/MVXNet. + + Args: + in_channels (list[int]): Input channels of multi-scale feature maps. + out_channels (list[int]): Output channels of feature maps. + upsample_strides (list[int]): Strides used to upsample + the feature maps. + norm_cfg (dict): Config dict of normalization layers. + upsample_cfg (dict): Config dict of upsample layers. + conv_cfg (dict): Config dict of conv layers. + """ + + def __init__(self, + in_channels=[128, 128, 256], + out_channels=[256, 256, 256], + upsample_strides=[1, 2, 4], + norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), + upsample_cfg=dict(type='deconv', bias=False), + conv_cfg=dict(type='Conv2d', bias=False)): + # if for GroupNorm, + # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) + super(CenterPointFPN, self).__init__() + assert len(out_channels) == len(upsample_strides) == len(in_channels) + self.in_channels = in_channels + self.out_channels = out_channels + + deblocks = [] + for i, out_channel in enumerate(out_channels): + stride = upsample_strides[i] + if stride > 1: + upsample_layer = build_upsample_layer( + upsample_cfg, + in_channels=in_channels[i], + out_channels=out_channel, + kernel_size=stride, + stride=upsample_strides[i]) + deblock = nn.Sequential( + upsample_layer, + build_norm_layer(norm_cfg, out_channel)[1], + nn.ReLU(inplace=True)) + else: + stride = np.round(1 / stride).astype(np.int64) + upsample_layer = build_conv_layer( + conv_cfg, + in_channels=in_channels[i], + out_channels=out_channel, + kernel_size=stride, + stride=stride) + deblock = nn.Sequential( + upsample_layer, + build_norm_layer(norm_cfg, out_channel)[1], + nn.ReLU(inplace=True)) + deblocks.append(deblock) + self.deblocks = nn.ModuleList(deblocks) + + def init_weights(self): + """Initialize weights of FPN.""" + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif is_norm(m): + constant_init(m, 1) + + def forward(self, x): + """Forward function. + + Args: + x (torch.Tensor): 4D Tensor in (N, C, H, W) shape. + + Returns: + list[torch.Tensor]: Multi-level feature maps. + """ + assert len(x) == len(self.in_channels) + ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)] + + if len(ups) > 1: + out = torch.cat(ups, dim=1) + else: + out = ups[0] + return out From 25142c2ff183e6f75c94927987b1f66bb22ef5dc Mon Sep 17 00:00:00 2001 From: liyinhao Date: Mon, 3 Aug 2020 22:14:17 +0800 Subject: [PATCH 06/15] Change SECONDFPN, delete centerpoint_fpn --- mmdet3d/models/necks/__init__.py | 3 +- mmdet3d/models/necks/centerpoint_fpn.py | 91 ------------------------- mmdet3d/models/necks/second_fpn.py | 45 ++++++++---- tests/test_necks.py | 7 +- 4 files changed, 37 insertions(+), 109 deletions(-) delete mode 100644 mmdet3d/models/necks/centerpoint_fpn.py diff --git a/mmdet3d/models/necks/__init__.py b/mmdet3d/models/necks/__init__.py index 0a10a512f4..85904b497c 100644 --- a/mmdet3d/models/necks/__init__.py +++ b/mmdet3d/models/necks/__init__.py @@ -1,5 +1,4 @@ from mmdet.models.necks.fpn import FPN -from .centerpoint_fpn import CenterPointFPN from .second_fpn import SECONDFPN -__all__ = ['FPN', 'SECONDFPN', 'CenterPointFPN'] +__all__ = ['FPN', 'SECONDFPN'] diff --git a/mmdet3d/models/necks/centerpoint_fpn.py b/mmdet3d/models/necks/centerpoint_fpn.py deleted file mode 100644 index 88c44c23d9..0000000000 --- a/mmdet3d/models/necks/centerpoint_fpn.py +++ /dev/null @@ -1,91 +0,0 @@ -import numpy as np -import torch -from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer, - constant_init, is_norm, kaiming_init) -from torch import nn as nn - -from mmdet.models import NECKS - - -@NECKS.register_module() -class CenterPointFPN(nn.Module): - """FPN used in SECOND/PointPillars/PartA2/MVXNet. - - Args: - in_channels (list[int]): Input channels of multi-scale feature maps. - out_channels (list[int]): Output channels of feature maps. - upsample_strides (list[int]): Strides used to upsample - the feature maps. - norm_cfg (dict): Config dict of normalization layers. - upsample_cfg (dict): Config dict of upsample layers. - conv_cfg (dict): Config dict of conv layers. - """ - - def __init__(self, - in_channels=[128, 128, 256], - out_channels=[256, 256, 256], - upsample_strides=[1, 2, 4], - norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), - upsample_cfg=dict(type='deconv', bias=False), - conv_cfg=dict(type='Conv2d', bias=False)): - # if for GroupNorm, - # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) - super(CenterPointFPN, self).__init__() - assert len(out_channels) == len(upsample_strides) == len(in_channels) - self.in_channels = in_channels - self.out_channels = out_channels - - deblocks = [] - for i, out_channel in enumerate(out_channels): - stride = upsample_strides[i] - if stride > 1: - upsample_layer = build_upsample_layer( - upsample_cfg, - in_channels=in_channels[i], - out_channels=out_channel, - kernel_size=stride, - stride=upsample_strides[i]) - deblock = nn.Sequential( - upsample_layer, - build_norm_layer(norm_cfg, out_channel)[1], - nn.ReLU(inplace=True)) - else: - stride = np.round(1 / stride).astype(np.int64) - upsample_layer = build_conv_layer( - conv_cfg, - in_channels=in_channels[i], - out_channels=out_channel, - kernel_size=stride, - stride=stride) - deblock = nn.Sequential( - upsample_layer, - build_norm_layer(norm_cfg, out_channel)[1], - nn.ReLU(inplace=True)) - deblocks.append(deblock) - self.deblocks = nn.ModuleList(deblocks) - - def init_weights(self): - """Initialize weights of FPN.""" - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif is_norm(m): - constant_init(m, 1) - - def forward(self, x): - """Forward function. - - Args: - x (torch.Tensor): 4D Tensor in (N, C, H, W) shape. - - Returns: - list[torch.Tensor]: Multi-level feature maps. - """ - assert len(x) == len(self.in_channels) - ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)] - - if len(ups) > 1: - out = torch.cat(ups, dim=1) - else: - out = ups[0] - return out diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index 2b2a404840..b3c970f333 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -1,6 +1,7 @@ +import numpy as np import torch -from mmcv.cnn import (build_norm_layer, build_upsample_layer, constant_init, - is_norm, kaiming_init) +from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer, + constant_init, is_norm, kaiming_init) from torch import nn as nn from mmdet.models import NECKS @@ -23,7 +24,9 @@ def __init__(self, out_channels=[256, 256, 256], upsample_strides=[1, 2, 4], norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), - upsample_cfg=dict(type='deconv', bias=False)): + upsample_cfg=dict(type='deconv', bias=False), + conv_cfg=dict(type='Conv2d', bias=False), + use_conv_for_no_stride=False): # if for GroupNorm, # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) super(SECONDFPN, self).__init__() @@ -33,16 +36,32 @@ def __init__(self, deblocks = [] for i, out_channel in enumerate(out_channels): - upsample_layer = build_upsample_layer( - upsample_cfg, - in_channels=in_channels[i], - out_channels=out_channel, - kernel_size=upsample_strides[i], - stride=upsample_strides[i]) - deblock = nn.Sequential(upsample_layer, - build_norm_layer(norm_cfg, out_channel)[1], - nn.ReLU(inplace=True)) - deblocks.append(deblock) + stride = upsample_strides[i] + if stride > 1 or (stride == 1 and not use_conv_for_no_stride): + upsample_layer = build_upsample_layer( + upsample_cfg, + in_channels=in_channels[i], + out_channels=out_channel, + kernel_size=upsample_strides[i], + stride=upsample_strides[i]) + deblock = nn.Sequential( + upsample_layer, + build_norm_layer(norm_cfg, out_channel)[1], + nn.ReLU(inplace=True)) + deblocks.append(deblock) + else: + stride = np.round(1 / stride).astype(np.int64) + upsample_layer = build_conv_layer( + conv_cfg, + in_channels=in_channels[i], + out_channels=out_channel, + kernel_size=stride, + stride=stride) + deblock = nn.Sequential( + upsample_layer, + build_norm_layer(norm_cfg, out_channel)[1], + nn.ReLU(inplace=True)) + deblocks.append(deblock) self.deblocks = nn.ModuleList(deblocks) def init_weights(self): diff --git a/tests/test_necks.py b/tests/test_necks.py index 02b836d0a6..d1f94a04e3 100644 --- a/tests/test_necks.py +++ b/tests/test_necks.py @@ -16,16 +16,17 @@ def test_centerpoint_rpn(): second = build_backbone(second_cfg) centerpoint_fpn_cfg = dict( - type='CenterPointFPN', + type='SECONDFPN', in_channels=[64, 128, 256], out_channels=[128, 128, 128], upsample_strides=[0.5, 1, 2], norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), - upsample_cfg=dict(type='deconv', bias=False)) + upsample_cfg=dict(type='deconv', bias=False), + use_conv_for_no_stride=True) centerpoint_fpn = build_neck(centerpoint_fpn_cfg) input = torch.rand([4, 64, 512, 512]) sec_output = second(input) output = centerpoint_fpn(sec_output) - assert output.shape == torch.Size([4, 384, 128, 128]) + assert output[0].shape == torch.Size([4, 384, 128, 128]) From 003c09e4a1eee42e288e2f2d46f4c166c2782daa Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 4 Aug 2020 08:26:40 +0800 Subject: [PATCH 07/15] Remove SparseBasicBlock. --- mmdet3d/models/middle_encoders/scn.py | 90 +++------------------------ tests/test_necks.py | 4 +- 2 files changed, 12 insertions(+), 82 deletions(-) diff --git a/mmdet3d/models/middle_encoders/scn.py b/mmdet3d/models/middle_encoders/scn.py index e7723a08fd..fe43cfdcb1 100644 --- a/mmdet3d/models/middle_encoders/scn.py +++ b/mmdet3d/models/middle_encoders/scn.py @@ -3,80 +3,10 @@ from torch import nn from mmdet3d.models.builder import MIDDLE_ENCODERS -from mmdet3d.ops import spconv +from mmdet3d.ops import SparseBasicBlock, spconv from mmdet3d.ops.spconv.conv import SparseConv3d, SubMConv3d -class SparseBasicBlock(spconv.SparseModule): - """Basic block for SpMiddleResNetFHD. - - Args: - inplanes (int): Number of input features. - planes (int): Number of output features. - kernel_size (int): Kernel_size of the conv layer. - Default: 3. - stride (int): Stride of the conv layer. - Default: 1. - bias (bool): Whether to use bias. - Default: False. - norm_cfg (dict): Configuration of normalization. - Default: dict(type='BN1d', eps=1e-3, momentum=0.01). - indice_key (str): Indice key for spconv. - Default: None. - """ - expansion = 1 - - def __init__( - self, - inplanes, - planes, - kernal_size=3, - stride=1, - bias=False, - norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), - indice_key=None, - ): - super(SparseBasicBlock, self).__init__() - - self.conv1 = spconv.SubMConv3d( - inplanes, - planes, - kernel_size=kernal_size, - stride=stride, - padding=1, - bias=bias, - indice_key=indice_key, - ) - self.bn1 = build_norm_layer(norm_cfg, planes)[1] - self.relu = nn.ReLU() - self.conv2 = spconv.SubMConv3d( - planes, - planes, - kernel_size=3, - stride=stride, - padding=1, - bias=bias, - indice_key=indice_key, - ) - self.bn2 = build_norm_layer(norm_cfg, planes)[1] - self.stride = stride - - def forward(self, x): - identity = x - - out = self.conv1(x) - out.features = self.bn1(out.features) - out.features = self.relu(out.features) - - out = self.conv2(out) - out.features = self.bn2(out.features) - - out.features += identity.features - out.features = self.relu(out.features) - - return out - - @MIDDLE_ENCODERS.register_module class SpMiddleResNetFHD(nn.Module): """Sparse Middle ResNet FHD. @@ -94,38 +24,38 @@ class SpMiddleResNetFHD(nn.Module): def __init__(self, num_input_features=128, norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), + conv_cfg=dict(type='SubMConv3d'), **kwargs): super(SpMiddleResNetFHD, self).__init__() self.dcn = None self.zero_init_residual = False - # input: # [1600, 1200, 41] self.middle_conv = spconv.SparseSequential( SubMConv3d( num_input_features, 16, 3, bias=False, indice_key='res0'), build_norm_layer(norm_cfg, 16)[1], nn.ReLU(), - SparseBasicBlock(16, 16, indice_key='res0'), - SparseBasicBlock(16, 16, indice_key='res0'), + SparseBasicBlock(16, 16, norm_cfg=norm_cfg, conv_cfg=conv_cfg), + SparseBasicBlock(16, 16, norm_cfg=norm_cfg, conv_cfg=conv_cfg), SparseConv3d(16, 32, 3, 2, padding=1, bias=False), # [1600, 1200, 41] -> [800, 600, 21] build_norm_layer(norm_cfg, 32)[1], nn.ReLU(), - SparseBasicBlock(32, 32, indice_key='res1'), - SparseBasicBlock(32, 32, indice_key='res1'), + SparseBasicBlock(32, 32, norm_cfg=norm_cfg, conv_cfg=conv_cfg), + SparseBasicBlock(32, 32, norm_cfg=norm_cfg, conv_cfg=conv_cfg), SparseConv3d(32, 64, 3, 2, padding=1, bias=False), # [800, 600, 21] -> [400, 300, 11] build_norm_layer(norm_cfg, 64)[1], nn.ReLU(), - SparseBasicBlock(64, 64, indice_key='res2'), - SparseBasicBlock(64, 64, indice_key='res2'), + SparseBasicBlock(64, 64, norm_cfg=norm_cfg, conv_cfg=conv_cfg), + SparseBasicBlock(64, 64, norm_cfg=norm_cfg, conv_cfg=conv_cfg), SparseConv3d(64, 128, 3, 2, padding=[0, 1, 1], bias=False), # [400, 300, 11] -> [200, 150, 5] build_norm_layer(norm_cfg, 128)[1], nn.ReLU(), - SparseBasicBlock(128, 128, indice_key='res3'), - SparseBasicBlock(128, 128, indice_key='res3'), + SparseBasicBlock(128, 128, norm_cfg=norm_cfg, conv_cfg=conv_cfg), + SparseBasicBlock(128, 128, norm_cfg=norm_cfg, conv_cfg=conv_cfg), SparseConv3d(128, 128, (3, 1, 1), (2, 1, 1), bias=False), # [200, 150, 5] -> [200, 150, 2] build_norm_layer(norm_cfg, 128)[1], diff --git a/tests/test_necks.py b/tests/test_necks.py index d1f94a04e3..4c236e99b1 100644 --- a/tests/test_necks.py +++ b/tests/test_necks.py @@ -24,9 +24,9 @@ def test_centerpoint_rpn(): upsample_cfg=dict(type='deconv', bias=False), use_conv_for_no_stride=True) - centerpoint_fpn = build_neck(centerpoint_fpn_cfg) + second_fpn = build_neck(centerpoint_fpn_cfg) input = torch.rand([4, 64, 512, 512]) sec_output = second(input) - output = centerpoint_fpn(sec_output) + output = second_fpn(sec_output) assert output[0].shape == torch.Size([4, 384, 128, 128]) From 6ce7399e03a7528f20505eae8d57caeba3515dcd Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 4 Aug 2020 15:28:20 +0800 Subject: [PATCH 08/15] Change SpMiddleResNetFHD to SparseEncoder. --- mmdet3d/models/middle_encoders/scn.py | 8 ++-- .../models/middle_encoders/sparse_encoder.py | 39 ++++++++++++++++--- tests/test_middle_encoders.py | 16 ++++++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/mmdet3d/models/middle_encoders/scn.py b/mmdet3d/models/middle_encoders/scn.py index fe43cfdcb1..901d5f1773 100644 --- a/mmdet3d/models/middle_encoders/scn.py +++ b/mmdet3d/models/middle_encoders/scn.py @@ -16,9 +16,10 @@ class SpMiddleResNetFHD(nn.Module): Args: num_input_features (int): Number of input features. Default: 128. - - norm_cfg (dict): Configuration of normalization. + norm_cfg (dict): Config of normalization. Default: dict(type='BN1d', eps=1e-3, momentum=0.01). + conv_cfg (dict): Config of conv. + Default: dict(type='SubMConv3d'). """ def __init__(self, @@ -32,8 +33,7 @@ def __init__(self, self.zero_init_residual = False # input: # [1600, 1200, 41] self.middle_conv = spconv.SparseSequential( - SubMConv3d( - num_input_features, 16, 3, bias=False, indice_key='res0'), + SubMConv3d(num_input_features, 16, 3, bias=False), build_norm_layer(norm_cfg, 16)[1], nn.ReLU(), SparseBasicBlock(16, 16, norm_cfg=norm_cfg, conv_cfg=conv_cfg), diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index 7f372b60ff..6794c5e276 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -1,6 +1,6 @@ from torch import nn as nn -from mmdet3d.ops import make_sparse_convmodule +from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule from mmdet3d.ops import spconv as spconv from ..registry import MIDDLE_ENCODERS @@ -30,7 +30,8 @@ def __init__(self, encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)), encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, - 1))): + 1)), + option=None): super().__init__() self.sparse_shape = sparse_shape self.in_channels = in_channels @@ -66,7 +67,10 @@ def __init__(self, conv_type='SubMConv3d') encoder_out_channels = self.make_encoder_layers( - make_sparse_convmodule, norm_cfg, self.base_channels) + make_sparse_convmodule, + norm_cfg, + self.base_channels, + option=option) self.conv_out = make_sparse_convmodule( encoder_out_channels, @@ -111,7 +115,12 @@ def forward(self, voxel_features, coors, batch_size): return spatial_features - def make_encoder_layers(self, make_block, norm_cfg, in_channels): + def make_encoder_layers(self, + make_block, + norm_cfg, + in_channels, + option=None, + conv_cfg=dict(type='SubMConv3d')): """make encoder layers using sparse convs. Args: @@ -130,7 +139,7 @@ def make_encoder_layers(self, make_block, norm_cfg, in_channels): padding = tuple(self.encoder_paddings[i])[j] # each stage started with a spconv layer # except the first stage - if i != 0 and j == 0: + if i != 0 and j == 0 and option is None: blocks_list.append( make_block( in_channels, @@ -141,6 +150,26 @@ def make_encoder_layers(self, make_block, norm_cfg, in_channels): padding=padding, indice_key=f'spconv{i + 1}', conv_type='SparseConv3d')) + elif option == 'basicblock': + if j == len(blocks) - 1 and i != len( + self.encoder_channels) - 1: + blocks_list.append( + make_block( + in_channels, + out_channels, + 3, + norm_cfg=norm_cfg, + stride=2, + padding=padding, + indice_key=f'spconv{i + 1}', + conv_type='SparseConv3d')) + else: + blocks_list.append( + SparseBasicBlock( + out_channels, + out_channels, + norm_cfg=norm_cfg, + conv_cfg=conv_cfg)) else: blocks_list.append( make_block( diff --git a/tests/test_middle_encoders.py b/tests/test_middle_encoders.py index 1de83d0221..75cfd40132 100644 --- a/tests/test_middle_encoders.py +++ b/tests/test_middle_encoders.py @@ -20,3 +20,19 @@ def test_sp_middle_resnet_FHD(): ret = sp_middle_resnet_FHD(voxel_features, coors, 4, np.array([1024, 1024, 40])) assert ret.shape == torch.Size([4, 256, 128, 128]) + + +def test_sparse_encoder(): + middle_encoder = dict( + type='SparseEncoder', + in_channels=4, + sparse_shape=[41, 1600, 1408], + order=('conv', 'norm', 'act'), + encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, + 128)), + encoder_paddings=((1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, + 1)), + option='basicblock') + + sparse_encoder = build_middle_encoder(middle_encoder) + print(sparse_encoder) From dbe07f41207856dcb1912a5a19f607a8a508b4ee Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 4 Aug 2020 15:46:39 +0800 Subject: [PATCH 09/15] Finish SparseEncoder unittest. --- mmdet3d/models/backbones/__init__.py | 3 +- mmdet3d/models/middle_encoders/scn.py | 89 --------------------------- tests/test_middle_encoders.py | 32 +++------- tests/test_voxel_encoders.py | 1 + 4 files changed, 12 insertions(+), 113 deletions(-) delete mode 100644 mmdet3d/models/middle_encoders/scn.py diff --git a/mmdet3d/models/backbones/__init__.py b/mmdet3d/models/backbones/__init__.py index 76fd2c14df..46c9b44aa3 100644 --- a/mmdet3d/models/backbones/__init__.py +++ b/mmdet3d/models/backbones/__init__.py @@ -1,4 +1,3 @@ -from mmdet3d.models.middle_encoders.scn import SpMiddleResNetFHD from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt from .nostem_regnet import NoStemRegNet from .pointnet2_sa_ssg import PointNet2SASSG @@ -6,5 +5,5 @@ __all__ = [ 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet', - 'SECOND', 'PointNet2SASSG', 'SpMiddleResNetFHD' + 'SECOND', 'PointNet2SASSG' ] diff --git a/mmdet3d/models/middle_encoders/scn.py b/mmdet3d/models/middle_encoders/scn.py deleted file mode 100644 index 901d5f1773..0000000000 --- a/mmdet3d/models/middle_encoders/scn.py +++ /dev/null @@ -1,89 +0,0 @@ -import numpy as np -from mmcv.cnn import build_norm_layer -from torch import nn - -from mmdet3d.models.builder import MIDDLE_ENCODERS -from mmdet3d.ops import SparseBasicBlock, spconv -from mmdet3d.ops.spconv.conv import SparseConv3d, SubMConv3d - - -@MIDDLE_ENCODERS.register_module -class SpMiddleResNetFHD(nn.Module): - """Sparse Middle ResNet FHD. - - Middle encoder used by CenterPoint. - - Args: - num_input_features (int): Number of input features. - Default: 128. - norm_cfg (dict): Config of normalization. - Default: dict(type='BN1d', eps=1e-3, momentum=0.01). - conv_cfg (dict): Config of conv. - Default: dict(type='SubMConv3d'). - """ - - def __init__(self, - num_input_features=128, - norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), - conv_cfg=dict(type='SubMConv3d'), - **kwargs): - super(SpMiddleResNetFHD, self).__init__() - - self.dcn = None - self.zero_init_residual = False - # input: # [1600, 1200, 41] - self.middle_conv = spconv.SparseSequential( - SubMConv3d(num_input_features, 16, 3, bias=False), - build_norm_layer(norm_cfg, 16)[1], - nn.ReLU(), - SparseBasicBlock(16, 16, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseBasicBlock(16, 16, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseConv3d(16, 32, 3, 2, padding=1, - bias=False), # [1600, 1200, 41] -> [800, 600, 21] - build_norm_layer(norm_cfg, 32)[1], - nn.ReLU(), - SparseBasicBlock(32, 32, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseBasicBlock(32, 32, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseConv3d(32, 64, 3, 2, padding=1, - bias=False), # [800, 600, 21] -> [400, 300, 11] - build_norm_layer(norm_cfg, 64)[1], - nn.ReLU(), - SparseBasicBlock(64, 64, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseBasicBlock(64, 64, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseConv3d(64, 128, 3, 2, padding=[0, 1, 1], - bias=False), # [400, 300, 11] -> [200, 150, 5] - build_norm_layer(norm_cfg, 128)[1], - nn.ReLU(), - SparseBasicBlock(128, 128, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseBasicBlock(128, 128, norm_cfg=norm_cfg, conv_cfg=conv_cfg), - SparseConv3d(128, 128, (3, 1, 1), (2, 1, 1), - bias=False), # [200, 150, 5] -> [200, 150, 2] - build_norm_layer(norm_cfg, 128)[1], - nn.ReLU(), - ) - - def forward(self, voxel_features, coors, batch_size, input_shape): - """Forward of SpMiddleResNetFHD. - - Args: - voxel_features (torch.Tensor): Voxel features with the - shape of [N, 5]. - coors (torch.Tensor): Voxel features with the shape of [N, 4]. - batch_size (int): Batch size. - input_shape (np.ndarray): Shape of input. - - Returns: - torch.Tensor: Result tensor. - """ - sparse_shape = np.array(input_shape[::-1]) + [1, 0, 0] - - coors = coors.int() - ret = spconv.SparseConvTensor(voxel_features, coors, sparse_shape, - batch_size) - ret = self.middle_conv(ret) - ret = ret.dense() - - N, C, D, H, W = ret.shape - ret = ret.view(N, C * D, H, W) - - return ret diff --git a/tests/test_middle_encoders.py b/tests/test_middle_encoders.py index 75cfd40132..e3ab306eb4 100644 --- a/tests/test_middle_encoders.py +++ b/tests/test_middle_encoders.py @@ -1,32 +1,16 @@ -import numpy as np import pytest import torch from mmdet3d.models.builder import build_middle_encoder -def test_sp_middle_resnet_FHD(): +def test_sparse_encoder(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') - sp_middle_resnet_FHD_cfg = dict( - type='SpMiddleResNetFHD', num_input_features=5, ds_factor=8) - - sp_middle_resnet_FHD = build_middle_encoder( - sp_middle_resnet_FHD_cfg).cuda() - - voxel_features = torch.rand([207842, 5]).cuda() - coors = torch.randint(0, 4, [207842, 4]).cuda() - - ret = sp_middle_resnet_FHD(voxel_features, coors, 4, - np.array([1024, 1024, 40])) - assert ret.shape == torch.Size([4, 256, 128, 128]) - - -def test_sparse_encoder(): - middle_encoder = dict( + sparse_encoder_cfg = dict( type='SparseEncoder', - in_channels=4, - sparse_shape=[41, 1600, 1408], + in_channels=5, + sparse_shape=[40, 1024, 1024], order=('conv', 'norm', 'act'), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), @@ -34,5 +18,9 @@ def test_sparse_encoder(): 1)), option='basicblock') - sparse_encoder = build_middle_encoder(middle_encoder) - print(sparse_encoder) + sparse_encoder = build_middle_encoder(sparse_encoder_cfg).cuda() + voxel_features = torch.rand([207842, 5]).cuda() + coors = torch.randint(0, 4, [207842, 4]).cuda() + + ret = sparse_encoder(voxel_features, coors, 4) + assert ret.shape == torch.Size([4, 256, 128, 128]) diff --git a/tests/test_voxel_encoders.py b/tests/test_voxel_encoders.py index c2f2a82ebc..db4526e075 100644 --- a/tests/test_voxel_encoders.py +++ b/tests/test_voxel_encoders.py @@ -41,3 +41,4 @@ def test_hard_simple_VFE(): outputs = hard_simple_VFE(features, num_voxels, None) assert outputs.shape == torch.Size([240000, 5]) + assert torch.isclose(torch.mean(outputs), torch.mean(features)) From 93f5f45fffc49e4ee5f67958b62b505b13ab8baa Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 4 Aug 2020 16:44:38 +0800 Subject: [PATCH 10/15] Change test_hard_simple_VFE. --- tests/test_voxel_encoders.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_voxel_encoders.py b/tests/test_voxel_encoders.py index db4526e075..8c9423b9df 100644 --- a/tests/test_voxel_encoders.py +++ b/tests/test_voxel_encoders.py @@ -37,8 +37,7 @@ def test_hard_simple_VFE(): hard_simple_VFE_cfg = dict(type='HardSimpleVFE', num_features=5) hard_simple_VFE = build_voxel_encoder(hard_simple_VFE_cfg) features = torch.rand([240000, 10, 5]) - num_voxels = torch.randint(0, 100, [240000]) + num_voxels = torch.randint(1, 10, [240000]) outputs = hard_simple_VFE(features, num_voxels, None) assert outputs.shape == torch.Size([240000, 5]) - assert torch.isclose(torch.mean(outputs), torch.mean(features)) From a18ab5b2ceaddb11488f07a530adb73e93676a6c Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 11 Aug 2020 22:14:32 +0800 Subject: [PATCH 11/15] Change option, add legacy. --- .../models/middle_encoders/sparse_encoder.py | 13 +++++---- mmdet3d/models/necks/second_fpn.py | 13 +++++---- .../models/voxel_encoders/pillar_encoder.py | 27 ++++++++++++++----- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index 6794c5e276..42d64522c0 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -31,7 +31,7 @@ def __init__(self, 64)), encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), - option=None): + block_type='submblock'): super().__init__() self.sparse_shape = sparse_shape self.in_channels = in_channels @@ -70,7 +70,7 @@ def __init__(self, make_sparse_convmodule, norm_cfg, self.base_channels, - option=option) + block_type=block_type) self.conv_out = make_sparse_convmodule( encoder_out_channels, @@ -119,7 +119,7 @@ def make_encoder_layers(self, make_block, norm_cfg, in_channels, - option=None, + block_type='submblock', conv_cfg=dict(type='SubMConv3d')): """make encoder layers using sparse convs. @@ -127,10 +127,13 @@ def make_encoder_layers(self, make_block (method): A bounded function to build blocks. norm_cfg (dict[str]): Config of normalization layer. in_channels (int): The number of encoder input channels. + block_type (str): Type of the block to use. + conv_cfg (dict): Config of conv layer. Returns: int: The number of encoder output channels. """ + assert block_type in ['submblock', 'basicblock'] self.encoder_layers = spconv.SparseSequential() for i, blocks in enumerate(self.encoder_channels): @@ -139,7 +142,7 @@ def make_encoder_layers(self, padding = tuple(self.encoder_paddings[i])[j] # each stage started with a spconv layer # except the first stage - if i != 0 and j == 0 and option is None: + if i != 0 and j == 0 and block_type == 'submblock': blocks_list.append( make_block( in_channels, @@ -150,7 +153,7 @@ def make_encoder_layers(self, padding=padding, indice_key=f'spconv{i + 1}', conv_type='SparseConv3d')) - elif option == 'basicblock': + elif block_type == 'basicblock': if j == len(blocks) - 1 and i != len( self.encoder_channels) - 1: blocks_list.append( diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index b3c970f333..c9bd8bc5e9 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -12,11 +12,14 @@ class SECONDFPN(nn.Module): """FPN used in SECOND/PointPillars/PartA2/MVXNet. Args: - in_channels (list[int]): Input channels of multi-scale feature maps - out_channels (list[int]): Output channels of feature maps - upsample_strides (list[int]): Strides used to upsample the feature maps - norm_cfg (dict): Config dict of normalization layers - upsample_cfg (dict): Config dict of upsample layers + in_channels (list[int]): Input channels of multi-scale feature maps. + out_channels (list[int]): Output channels of feature maps. + upsample_strides (list[int]): Strides used to upsample the + feature maps. + norm_cfg (dict): Config dict of normalization layers. + upsample_cfg (dict): Config dict of upsample layers. + conv_cfg (dict): Config dict of conv layers. + use_conv_for_no_stride (bool): Whether to use conv when stride is 1. """ def __init__(self, diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 570cceac69..2bb4a42a7a 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -82,14 +82,16 @@ def __init__(self, self.y_offset = self.vy / 2 + point_cloud_range[1] self.point_cloud_range = point_cloud_range - def forward(self, features, num_points, coors): + def forward(self, features, num_points, coors, legacy=True): """Forward function. Args: features (torch.Tensor): Point features or raw points in shape (N, M, C). num_points (torch.Tensor): Number of points in each pillar. - coors (torch.Tensor): Coordinates of each voxel + coors (torch.Tensor): Coordinates of each voxel. + legacy (bool): Whether to use the new behavior or + the original behavior. Returns: torch.Tensor: Features of pillars. @@ -106,11 +108,22 @@ def forward(self, features, num_points, coors): # Find distance of x, y, and z from pillar center dtype = features.dtype if self._with_voxel_center: - f_center = torch.zeros_like(features[:, :, :2]) - f_center[:, :, 0] = features[:, :, 0] - ( - coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - ( - coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset) + if legacy: + f_center = features[:, :, :2] + f_center[:, :, 0] = f_center[:, :, 0] - ( + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = f_center[:, :, 1] - ( + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + + self.y_offset) + else: + f_center = torch.zeros_like(features[:, :, :2]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + + self.y_offset) features_ls.append(f_center) if self._with_distance: From d2bb205bfab12170225fc3ef5d838d634c9f8dd0 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Fri, 14 Aug 2020 22:17:15 +0800 Subject: [PATCH 12/15] Change docstring, change legacy. --- .../models/middle_encoders/sparse_encoder.py | 2 ++ mmdet3d/models/necks/second_fpn.py | 15 ++++------ .../models/voxel_encoders/pillar_encoder.py | 28 ++++++++++--------- .../models/voxel_encoders/voxel_encoder.py | 3 ++ 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index 42d64522c0..446d74bd55 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -18,6 +18,7 @@ class SparseEncoder(nn.Module): encoder_channels (tuple[tuple[int]]): Convolutional channels of each encode block. encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. + block_type (str): Type of the block to use. """ def __init__(self, @@ -33,6 +34,7 @@ def __init__(self, 1)), block_type='submblock'): super().__init__() + assert block_type in ['submblock', 'basicblock'] self.sparse_shape = sparse_shape self.in_channels = in_channels self.order = order diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index c9bd8bc5e9..ec9d40ee5c 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -47,11 +47,6 @@ def __init__(self, out_channels=out_channel, kernel_size=upsample_strides[i], stride=upsample_strides[i]) - deblock = nn.Sequential( - upsample_layer, - build_norm_layer(norm_cfg, out_channel)[1], - nn.ReLU(inplace=True)) - deblocks.append(deblock) else: stride = np.round(1 / stride).astype(np.int64) upsample_layer = build_conv_layer( @@ -60,11 +55,11 @@ def __init__(self, out_channels=out_channel, kernel_size=stride, stride=stride) - deblock = nn.Sequential( - upsample_layer, - build_norm_layer(norm_cfg, out_channel)[1], - nn.ReLU(inplace=True)) - deblocks.append(deblock) + + deblock = nn.Sequential(upsample_layer, + build_norm_layer(norm_cfg, out_channel)[1], + nn.ReLU(inplace=True)) + deblocks.append(deblock) self.deblocks = nn.ModuleList(deblocks) def init_weights(self): diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 2bb4a42a7a..3f5e20d060 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -31,6 +31,8 @@ class PillarFeatureNet(nn.Module): Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). mode (str, optional): The mode to gather point features. Options are 'max' or 'avg'. Defaults to 'max'. + legacy (bool): Whether to use the new behavior or + the original behavior. """ def __init__(self, @@ -42,9 +44,11 @@ def __init__(self, voxel_size=(0.2, 0.2, 4), point_cloud_range=(0, -40, -3, 70.4, 40, 1), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), - mode='max'): + mode='max', + legacy=False): super(PillarFeatureNet, self).__init__() assert len(feat_channels) > 0 + self.egacy = legacy if with_cluster_center: in_channels += 3 if with_voxel_center: @@ -82,7 +86,7 @@ def __init__(self, self.y_offset = self.vy / 2 + point_cloud_range[1] self.point_cloud_range = point_cloud_range - def forward(self, features, num_points, coors, legacy=True): + def forward(self, features, num_points, coors): """Forward function. Args: @@ -90,8 +94,6 @@ def forward(self, features, num_points, coors, legacy=True): (N, M, C). num_points (torch.Tensor): Number of points in each pillar. coors (torch.Tensor): Coordinates of each voxel. - legacy (bool): Whether to use the new behavior or - the original behavior. Returns: torch.Tensor: Features of pillars. @@ -108,15 +110,7 @@ def forward(self, features, num_points, coors, legacy=True): # Find distance of x, y, and z from pillar center dtype = features.dtype if self._with_voxel_center: - if legacy: - f_center = features[:, :, :2] - f_center[:, :, 0] = f_center[:, :, 0] - ( - coors[:, 3].type_as(features).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = f_center[:, :, 1] - ( - coors[:, 2].type_as(features).unsqueeze(1) * self.vy + - self.y_offset) - else: + if self.legacy: f_center = torch.zeros_like(features[:, :, :2]) f_center[:, :, 0] = features[:, :, 0] - ( coors[:, 3].to(dtype).unsqueeze(1) * self.vx + @@ -124,6 +118,14 @@ def forward(self, features, num_points, coors, legacy=True): f_center[:, :, 1] = features[:, :, 1] - ( coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset) + else: + f_center = features[:, :, :2] + f_center[:, :, 0] = f_center[:, :, 0] - ( + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = f_center[:, :, 1] - ( + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + + self.y_offset) features_ls.append(f_center) if self._with_distance: diff --git a/mmdet3d/models/voxel_encoders/voxel_encoder.py b/mmdet3d/models/voxel_encoders/voxel_encoder.py index 7bd1400b53..b647eb608f 100644 --- a/mmdet3d/models/voxel_encoders/voxel_encoder.py +++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py @@ -13,6 +13,9 @@ class HardSimpleVFE(nn.Module): """Simple voxel feature encoder used in SECOND. It simply averages the values of points in a voxel. + + Args: + num_features (int): Number of features to use. Default: 4. """ def __init__(self, num_features=4): From c47e207128d97d941e0f907297394fcb34acca9e Mon Sep 17 00:00:00 2001 From: liyinhao Date: Sat, 15 Aug 2020 20:57:57 +0800 Subject: [PATCH 13/15] Fix legacy bug. --- mmdet3d/models/voxel_encoders/pillar_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 3f5e20d060..39699ff13d 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -48,7 +48,7 @@ def __init__(self, legacy=False): super(PillarFeatureNet, self).__init__() assert len(feat_channels) > 0 - self.egacy = legacy + self.legacy = legacy if with_cluster_center: in_channels += 3 if with_voxel_center: From df9507f0c90bd15bbf50257b50e3119ffac486f7 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 18 Aug 2020 16:10:47 +0800 Subject: [PATCH 14/15] Change unittest, change docstring. --- .../models/middle_encoders/sparse_encoder.py | 18 +++++++++------- .../models/voxel_encoders/pillar_encoder.py | 6 +++--- tests/test_necks.py | 21 +++++++++++++++---- tests/test_voxel_encoders.py | 12 +---------- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index 446d74bd55..e6e56b4a3e 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -18,7 +18,7 @@ class SparseEncoder(nn.Module): encoder_channels (tuple[tuple[int]]): Convolutional channels of each encode block. encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. - block_type (str): Type of the block to use. + block_type (str): Type of the block to use. Defaults to 'conv_module'. """ def __init__(self, @@ -32,9 +32,9 @@ def __init__(self, 64)), encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), - block_type='submblock'): + block_type='conv_module'): super().__init__() - assert block_type in ['submblock', 'basicblock'] + assert block_type in ['conv_module', 'basicblock'] self.sparse_shape = sparse_shape self.in_channels = in_channels self.order = order @@ -121,7 +121,7 @@ def make_encoder_layers(self, make_block, norm_cfg, in_channels, - block_type='submblock', + block_type='conv_module', conv_cfg=dict(type='SubMConv3d')): """make encoder layers using sparse convs. @@ -129,13 +129,15 @@ def make_encoder_layers(self, make_block (method): A bounded function to build blocks. norm_cfg (dict[str]): Config of normalization layer. in_channels (int): The number of encoder input channels. - block_type (str): Type of the block to use. - conv_cfg (dict): Config of conv layer. + block_type (str): Type of the block to use. Defaults to + 'conv_module'. + conv_cfg (dict): Config of conv layer. Defaults to + dict(type='SubMConv3d'). Returns: int: The number of encoder output channels. """ - assert block_type in ['submblock', 'basicblock'] + assert block_type in ['conv_module', 'basicblock'] self.encoder_layers = spconv.SparseSequential() for i, blocks in enumerate(self.encoder_channels): @@ -144,7 +146,7 @@ def make_encoder_layers(self, padding = tuple(self.encoder_paddings[i])[j] # each stage started with a spconv layer # except the first stage - if i != 0 and j == 0 and block_type == 'submblock': + if i != 0 and j == 0 and block_type == 'conv_module': blocks_list.append( make_block( in_channels, diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 39699ff13d..3411260971 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -32,7 +32,7 @@ class PillarFeatureNet(nn.Module): mode (str, optional): The mode to gather point features. Options are 'max' or 'avg'. Defaults to 'max'. legacy (bool): Whether to use the new behavior or - the original behavior. + the original behavior. Defaults to True. """ def __init__(self, @@ -45,7 +45,7 @@ def __init__(self, point_cloud_range=(0, -40, -3, 70.4, 40, 1), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), mode='max', - legacy=False): + legacy=True): super(PillarFeatureNet, self).__init__() assert len(feat_channels) > 0 self.legacy = legacy @@ -110,7 +110,7 @@ def forward(self, features, num_points, coors): # Find distance of x, y, and z from pillar center dtype = features.dtype if self._with_voxel_center: - if self.legacy: + if not self.legacy: f_center = torch.zeros_like(features[:, :, :2]) f_center[:, :, 0] = features[:, :, 0] - ( coors[:, 3].to(dtype).unsqueeze(1) * self.vx + diff --git a/tests/test_necks.py b/tests/test_necks.py index 4c236e99b1..7e924cadaa 100644 --- a/tests/test_necks.py +++ b/tests/test_necks.py @@ -3,7 +3,8 @@ from mmdet3d.models.builder import build_backbone, build_neck -def test_centerpoint_rpn(): +def test_centerpoint_fpn(): + second_cfg = dict( type='SECOND', in_channels=64, @@ -15,6 +16,7 @@ def test_centerpoint_rpn(): second = build_backbone(second_cfg) + # centerpoint usage of fpn centerpoint_fpn_cfg = dict( type='SECONDFPN', in_channels=[64, 128, 256], @@ -24,9 +26,20 @@ def test_centerpoint_rpn(): upsample_cfg=dict(type='deconv', bias=False), use_conv_for_no_stride=True) - second_fpn = build_neck(centerpoint_fpn_cfg) + # original usage of fpn + fpn_cfg = dict( + type='SECONDFPN', + in_channels=[64, 128, 256], + upsample_strides=[1, 2, 4], + out_channels=[128, 128, 128]) + + second_fpn = build_neck(fpn_cfg) + + centerpoint_second_fpn = build_neck(centerpoint_fpn_cfg) input = torch.rand([4, 64, 512, 512]) sec_output = second(input) - output = second_fpn(sec_output) - assert output[0].shape == torch.Size([4, 384, 128, 128]) + centerpoint_output = centerpoint_second_fpn(sec_output) + second_output = second_fpn(sec_output) + assert centerpoint_output[0].shape == torch.Size([4, 384, 128, 128]) + assert second_output[0].shape == torch.Size([4, 384, 256, 256]) diff --git a/tests/test_voxel_encoders.py b/tests/test_voxel_encoders.py index 8c9423b9df..f7503a8c34 100644 --- a/tests/test_voxel_encoders.py +++ b/tests/test_voxel_encoders.py @@ -1,18 +1,9 @@ -import numpy as np import torch from mmdet3d.models.builder import build_voxel_encoder -def _set_seed(): - torch.manual_seed(0) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - np.random.seed(0) - - def test_pillar_feature_net(): - _set_seed() pillar_feature_net_cfg = dict( type='PillarFeatureNet', in_channels=5, @@ -20,8 +11,7 @@ def test_pillar_feature_net(): with_distance=False, voxel_size=(0.2, 0.2, 8), point_cloud_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0), - norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), - ) + norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)) pillar_feature_net = build_voxel_encoder(pillar_feature_net_cfg) From aedad0ad85191533296d9b5ea23ebe25b7ca7a77 Mon Sep 17 00:00:00 2001 From: liyinhao Date: Tue, 18 Aug 2020 16:46:14 +0800 Subject: [PATCH 15/15] Change docstring. --- mmdet3d/models/middle_encoders/sparse_encoder.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index e6e56b4a3e..00462aad01 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -12,12 +12,18 @@ class SparseEncoder(nn.Module): Args: in_channels (int): The number of input channels. sparse_shape (list[int]): The sparse shape of input tensor. - norm_cfg (dict): Config of normalization layer. + order (list[str]): Order of conv module. Defaults to ('conv', + 'norm', 'act'). + norm_cfg (dict): Config of normalization layer. Defaults to + dict(type='BN1d', eps=1e-3, momentum=0.01). base_channels (int): Out channels for conv_input layer. + Defaults to 16. output_channels (int): Out channels for conv_out layer. + Defaults to 128. encoder_channels (tuple[tuple[int]]): Convolutional channels of each encode block. encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. + Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)). block_type (str): Type of the block to use. Defaults to 'conv_module'. """