Skip to content

Commit

Permalink
Merge 5fbf125 into 35fd839
Browse files Browse the repository at this point in the history
  • Loading branch information
sunjiahao1999 authored May 10, 2023
2 parents 35fd839 + 5fbf125 commit b934f20
Show file tree
Hide file tree
Showing 18 changed files with 2,449 additions and 7 deletions.
3 changes: 2 additions & 1 deletion mmdet3d/models/decode_heads/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .cylinder3d_head import Cylinder3DHead
from .decode_head import Base3DDecodeHead
from .dgcnn_head import DGCNNHead
from .minkunet_head import MinkUNetHead
from .paconv_head import PAConvHead
from .pointnet2_head import PointNet2Head

__all__ = [
'PointNet2Head', 'DGCNNHead', 'PAConvHead', 'Cylinder3DHead',
'MinkUNetHead'
'Base3DDecodeHead', 'MinkUNetHead'
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_base_ = ['mmdet3d::_base_/default_runtime.py']
_base_ = ['../../../configs/_base_/default_runtime.py']
custom_imports = dict(
imports=['projects.CenterFormer.centerformer'], allow_failed_imports=False)

Expand Down
2 changes: 1 addition & 1 deletion projects/DETR3D/configs/detr3d_r101_gridmask.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
_base_ = [
# 'mmdet3d::_base_/datasets/nus-3d.py',
'mmdet3d::_base_/default_runtime.py'
'../../../configs/_base_/default_runtime.py'
]

custom_imports = dict(imports=['projects.DETR3D.detr3d'])
Expand Down
5 changes: 3 additions & 2 deletions projects/PETR/configs/petr_vovnet_gridmask_p4_800x320.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
_base_ = [
'mmdet3d::_base_/datasets/nus-3d.py', 'mmdet3d::_base_/default_runtime.py',
'mmdet3d::_base_/schedules/cyclic-20e.py'
'../../../configs/_base_/datasets/nus-3d.py',
'../../../configs/_base_/default_runtime.py',
'../../../configs/_base_/schedules/cyclic-20e.py'
]
backbone_norm_cfg = dict(type='LN', requires_grad=True)
custom_imports = dict(imports=['projects.PETR.petr'])
Expand Down
317 changes: 317 additions & 0 deletions projects/TPVFormer/config/tpvformer_8xb1-2x_nus-seg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
_base_ = ['../../../configs/_base_/default_runtime.py']

custom_imports = dict(
imports=['projects.TPVFormer.tpvformer'], allow_failed_imports=False)

dataset_type = 'NuScenesSegDataset'
data_root = 'data/nuscenes/'
data_prefix = dict(
pts='samples/LIDAR_TOP',
pts_semantic_mask='lidarseg/v1.0-trainval',
CAM_FRONT='samples/CAM_FRONT',
CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
CAM_BACK='samples/CAM_BACK',
CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
CAM_BACK_LEFT='samples/CAM_BACK_LEFT')

backend_args = None

train_pipeline = [
dict(
type='BEVLoadMultiViewImageFromFiles',
to_float32=False,
color_type='unchanged',
num_views=6,
backend_args=backend_args),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=3,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
with_attr_label=False,
seg_3d_dtype='np.uint8'),
dict(
type='MultiViewWrapper',
transforms=dict(type='PhotoMetricDistortion3D')),
dict(type='SegLabelMapping'),
dict(
type='Pack3DDetInputs',
keys=['img', 'points', 'pts_semantic_mask'],
meta_keys=['lidar2img'])
]

val_pipeline = [
dict(
type='BEVLoadMultiViewImageFromFiles',
to_float32=False,
color_type='unchanged',
num_views=6,
backend_args=backend_args),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=3,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
with_attr_label=False,
seg_3d_dtype='np.uint8'),
dict(type='SegLabelMapping'),
dict(
type='Pack3DDetInputs',
keys=['img', 'points', 'pts_semantic_mask'],
meta_keys=['lidar2img'])
]

test_pipeline = val_pipeline

train_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
drop_last=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=data_prefix,
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
test_mode=False))

val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=data_prefix,
ann_file='nuscenes_infos_val.pkl',
pipeline=val_pipeline,
test_mode=True))

test_dataloader = val_dataloader

val_evaluator = dict(type='SegMetric')

test_evaluator = val_evaluator

vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=2e-4, weight_decay=0.01),
paramwise_cfg=dict(custom_keys={
'backbone': dict(lr_mult=0.1),
}),
clip_grad=dict(max_norm=35, norm_type=2),
)

param_scheduler = [
dict(type='LinearLR', start_factor=1e-5, by_epoch=False, begin=0, end=500),
dict(
type='CosineAnnealingLR',
begin=0,
T_max=24,
by_epoch=True,
eta_min=1e-6,
convert_to_iter_based=True)
]

train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1))

point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
_dim_ = 128
num_heads = 8
_ffn_dim_ = _dim_ * 2

tpv_h_ = 200
tpv_w_ = 200
tpv_z_ = 16
scale_h = 1
scale_w = 1
scale_z = 1
num_points_in_pillar = [4, 32, 32]
num_points = [8, 64, 64]
hybrid_attn_anchors = 16
hybrid_attn_points = 32
hybrid_attn_init = 0

grid_shape = [tpv_h_ * scale_h, tpv_w_ * scale_w, tpv_z_ * scale_z]

self_cross_layer = dict(
type='TPVFormerLayer',
attn_cfgs=[
dict(
type='TPVCrossViewHybridAttention',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_anchors=hybrid_attn_anchors,
embed_dims=_dim_,
num_heads=num_heads,
num_points=hybrid_attn_points,
init_mode=hybrid_attn_init,
dropout=0.1),
dict(
type='TPVImageCrossAttention',
pc_range=point_cloud_range,
num_cams=6,
dropout=0.1,
deformable_attention=dict(
type='TPVMSDeformableAttention3D',
embed_dims=_dim_,
num_heads=num_heads,
num_points=num_points,
num_z_anchors=num_points_in_pillar,
num_levels=4,
floor_sampling_offset=False,
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_),
embed_dims=_dim_,
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_)
],
feedforward_channels=_ffn_dim_,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))

self_layer = dict(
type='TPVFormerLayer',
attn_cfgs=[
dict(
type='TPVCrossViewHybridAttention',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_anchors=hybrid_attn_anchors,
embed_dims=_dim_,
num_heads=num_heads,
num_points=hybrid_attn_points,
init_mode=hybrid_attn_init,
dropout=0.1)
],
feedforward_channels=_ffn_dim_,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))

model = dict(
type='TPVFormer',
data_preprocessor=dict(
type='TPVFormerDataPreprocessor',
pad_size_divisor=32,
mean=[103.530, 116.280, 123.675],
std=[1.0, 1.0, 1.0],
voxel=True,
voxel_type='cylindrical',
voxel_layer=dict(
grid_shape=grid_shape,
point_cloud_range=point_cloud_range,
max_num_points=-1,
max_voxels=-1,
),
batch_augments=[
dict(
type='GridMask',
use_h=True,
use_w=True,
rotate=1,
offset=False,
ratio=0.5,
mode=1,
prob=0.7)
]),
backbone=dict(
type='mmdet.ResNet',
depth=101,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN2d', requires_grad=False),
norm_eval=True,
style='caffe',
dcn=dict(
type='DCNv2', deform_groups=1, fallback_on_stride=False
), # original DCNv2 will print log when perform load_state_dict
stage_with_dcn=(False, False, True, True),
init_cfg=dict(
type='Pretrained',
checkpoint='checkpoints/tpvformer_r101_dcn_fcos3d_pretrain.pth',
prefix='backbone.')),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=_dim_,
start_level=0,
add_extra_convs='on_output',
num_outs=4,
relu_before_extra_convs=True,
init_cfg=dict(
type='Pretrained',
checkpoint='checkpoints/tpvformer_r101_dcn_fcos3d_pretrain.pth',
prefix='neck.')),
encoder=dict(
type='TPVFormerEncoder',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_layers=5,
pc_range=point_cloud_range,
num_points_in_pillar=num_points_in_pillar,
num_points_in_pillar_cross_view=[16, 16, 16],
return_intermediate=False,
transformerlayers=[
self_cross_layer, self_cross_layer, self_cross_layer, self_layer,
self_layer
],
embed_dims=_dim_,
positional_encoding=dict(
type='TPVFormerPositionalEncoding',
num_feats=[48, 48, 32],
h=tpv_h_,
w=tpv_w_,
z=tpv_z_)),
decode_head=dict(
type='TPVFormerDecoder',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_classes=17,
in_dims=_dim_,
hidden_dims=2 * _dim_,
out_dims=_dim_,
scale_h=scale_h,
scale_w=scale_w,
scale_z=scale_z,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
avg_non_ignore=True,
loss_weight=1.0),
loss_lovasz=dict(type='LovaszLoss', loss_weight=1.0, reduction='none'),
lovasz_input='points',
ce_input='voxel',
ignore_index=0))
17 changes: 17 additions & 0 deletions projects/TPVFormer/tpvformer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from .cross_view_hybrid_attention import TPVCrossViewHybridAttention
from .data_preprocessor import TPVFormerDataPreprocessor
from .image_cross_attention import TPVImageCrossAttention
from .loading import BEVLoadMultiViewImageFromFiles, SegLabelMapping
from .nuscenes_dataset import NuScenesSegDataset
from .positional_encoding import TPVFormerPositionalEncoding
from .tpvformer import TPVFormer
from .tpvformer_encoder import TPVFormerEncoder
from .tpvformer_head import TPVFormerDecoder
from .tpvformer_layer import TPVFormerLayer

__all__ = [
'TPVCrossViewHybridAttention', 'TPVImageCrossAttention',
'TPVFormerPositionalEncoding', 'TPVFormer', 'TPVFormerEncoder',
'TPVFormerLayer', 'NuScenesSegDataset', 'BEVLoadMultiViewImageFromFiles',
'SegLabelMapping', 'TPVFormerDecoder', 'TPVFormerDataPreprocessor'
]
Loading

0 comments on commit b934f20

Please sign in to comment.