Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add TPVFormer Projects #2399

Merged
merged 31 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8a629f5
fix polarmix UT
sunjiahao1999 Feb 27, 2023
10bdefe
Merge branch 'dev-1.x' of github.com:open-mmlab/mmdetection3d into de…
sunjiahao1999 Feb 28, 2023
e66e5a7
Merge branch 'dev-1.x' of github.com:open-mmlab/mmdetection3d into de…
sunjiahao1999 Mar 1, 2023
372ecba
Merge branch 'dev-1.x' of github.com:open-mmlab/mmdetection3d into de…
sunjiahao1999 Mar 6, 2023
e78b860
Merge branch 'dev-1.x' of github.com:open-mmlab/mmdetection3d into de…
sunjiahao1999 Mar 8, 2023
e74d324
init tpvformer
sunjiahao1999 Mar 14, 2023
cbd6b80
add nus seg
sunjiahao1999 Mar 22, 2023
90ff44e
add nus seg
sunjiahao1999 Mar 22, 2023
851a7c9
Merge branch 'dev-1.x' of github.com:open-mmlab/mmdetection3d into de…
sunjiahao1999 Mar 22, 2023
94c8d89
Merge branch 'dev-1.x' into tpvformer
sunjiahao1999 Mar 22, 2023
6f19324
merge from dev-1.x
sunjiahao1999 Mar 28, 2023
f37db85
test done
sunjiahao1999 Mar 29, 2023
5bf1961
Merge branch 'dev-1.x' into tpvformer
sunjiahao1999 Mar 29, 2023
dfdb70f
Delete change_key.py
sunjiahao1999 Mar 29, 2023
175fe18
Delete test_dcn.py
sunjiahao1999 Mar 29, 2023
03ca29c
remove seg eval
sunjiahao1999 Mar 29, 2023
64be4a9
fix encoder
sunjiahao1999 Mar 29, 2023
9bd5d93
init train
sunjiahao1999 Apr 9, 2023
79923c1
train ready
sunjiahao1999 Apr 12, 2023
1a52343
Merge branch 'dev-1.x' into tpvformer
sunjiahao1999 Apr 13, 2023
0c337a5
remove asynctest
sunjiahao1999 Apr 19, 2023
7782c5d
change test.yml
sunjiahao1999 Apr 19, 2023
ee69b5f
pr_stage_test.yml & merge_stage_test.yml
sunjiahao1999 Apr 19, 2023
0b3342a
pip install wheel
sunjiahao1999 Apr 19, 2023
baa35c9
pip install wheel all
sunjiahao1999 Apr 19, 2023
71159c0
Merge branch 'dev-1.x' into tpvformer
sunjiahao1999 Apr 25, 2023
5893451
check type hint
sunjiahao1999 Apr 25, 2023
3369fd6
check comments
sunjiahao1999 Apr 25, 2023
8a06062
remove Photo aug
sunjiahao1999 Apr 25, 2023
8414600
fix p2v
sunjiahao1999 Apr 25, 2023
5fbf125
fix docsting & fix config filepath
sunjiahao1999 May 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mmdet3d/models/decode_heads/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .cylinder3d_head import Cylinder3DHead
from .decode_head import Base3DDecodeHead
from .dgcnn_head import DGCNNHead
from .minkunet_head import MinkUNetHead
from .paconv_head import PAConvHead
from .pointnet2_head import PointNet2Head

__all__ = [
'PointNet2Head', 'DGCNNHead', 'PAConvHead', 'Cylinder3DHead',
'MinkUNetHead'
'Base3DDecodeHead', 'MinkUNetHead'
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_base_ = ['mmdet3d::_base_/default_runtime.py']
_base_ = ['../../../configs/_base_/default_runtime.py']
custom_imports = dict(
imports=['projects.CenterFormer.centerformer'], allow_failed_imports=False)

Expand Down
2 changes: 1 addition & 1 deletion projects/DETR3D/configs/detr3d_r101_gridmask.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
_base_ = [
# 'mmdet3d::_base_/datasets/nus-3d.py',
'mmdet3d::_base_/default_runtime.py'
'../../../configs/_base_/default_runtime.py'
]

custom_imports = dict(imports=['projects.DETR3D.detr3d'])
Expand Down
5 changes: 3 additions & 2 deletions projects/PETR/configs/petr_vovnet_gridmask_p4_800x320.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
_base_ = [
'mmdet3d::_base_/datasets/nus-3d.py', 'mmdet3d::_base_/default_runtime.py',
'mmdet3d::_base_/schedules/cyclic-20e.py'
'../../../configs/_base_/datasets/nus-3d.py',
'../../../configs/_base_/default_runtime.py',
'../../../configs/_base_/schedules/cyclic-20e.py'
]
backbone_norm_cfg = dict(type='LN', requires_grad=True)
custom_imports = dict(imports=['projects.PETR.petr'])
Expand Down
317 changes: 317 additions & 0 deletions projects/TPVFormer/config/tpvformer_8xb1-2x_nus-seg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
_base_ = ['../../../configs/_base_/default_runtime.py']

custom_imports = dict(
imports=['projects.TPVFormer.tpvformer'], allow_failed_imports=False)

dataset_type = 'NuScenesSegDataset'
data_root = 'data/nuscenes/'
data_prefix = dict(
pts='samples/LIDAR_TOP',
pts_semantic_mask='lidarseg/v1.0-trainval',
CAM_FRONT='samples/CAM_FRONT',
CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
CAM_BACK='samples/CAM_BACK',
CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
CAM_BACK_LEFT='samples/CAM_BACK_LEFT')

backend_args = None

train_pipeline = [
dict(
type='BEVLoadMultiViewImageFromFiles',
to_float32=False,
color_type='unchanged',
num_views=6,
backend_args=backend_args),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=3,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
with_attr_label=False,
seg_3d_dtype='np.uint8'),
dict(
type='MultiViewWrapper',
transforms=dict(type='PhotoMetricDistortion3D')),
dict(type='SegLabelMapping'),
dict(
type='Pack3DDetInputs',
keys=['img', 'points', 'pts_semantic_mask'],
meta_keys=['lidar2img'])
sunjiahao1999 marked this conversation as resolved.
Show resolved Hide resolved
]

val_pipeline = [
dict(
type='BEVLoadMultiViewImageFromFiles',
to_float32=False,
color_type='unchanged',
num_views=6,
backend_args=backend_args),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=3,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
with_attr_label=False,
seg_3d_dtype='np.uint8'),
dict(type='SegLabelMapping'),
dict(
type='Pack3DDetInputs',
keys=['img', 'points', 'pts_semantic_mask'],
meta_keys=['lidar2img'])
sunjiahao1999 marked this conversation as resolved.
Show resolved Hide resolved
]

test_pipeline = val_pipeline

train_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
drop_last=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=data_prefix,
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
test_mode=False))

val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=data_prefix,
ann_file='nuscenes_infos_val.pkl',
pipeline=val_pipeline,
test_mode=True))

test_dataloader = val_dataloader

val_evaluator = dict(type='SegMetric')

test_evaluator = val_evaluator

vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=2e-4, weight_decay=0.01),
paramwise_cfg=dict(custom_keys={
'backbone': dict(lr_mult=0.1),
}),
clip_grad=dict(max_norm=35, norm_type=2),
)

param_scheduler = [
dict(type='LinearLR', start_factor=1e-5, by_epoch=False, begin=0, end=500),
dict(
type='CosineAnnealingLR',
begin=0,
T_max=24,
by_epoch=True,
eta_min=1e-6,
convert_to_iter_based=True)
]

train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1))

point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
_dim_ = 128
num_heads = 8
_ffn_dim_ = _dim_ * 2

tpv_h_ = 200
tpv_w_ = 200
tpv_z_ = 16
scale_h = 1
scale_w = 1
scale_z = 1
num_points_in_pillar = [4, 32, 32]
num_points = [8, 64, 64]
hybrid_attn_anchors = 16
hybrid_attn_points = 32
hybrid_attn_init = 0

grid_shape = [tpv_h_ * scale_h, tpv_w_ * scale_w, tpv_z_ * scale_z]

self_cross_layer = dict(
sunjiahao1999 marked this conversation as resolved.
Show resolved Hide resolved
type='TPVFormerLayer',
attn_cfgs=[
dict(
type='TPVCrossViewHybridAttention',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_anchors=hybrid_attn_anchors,
embed_dims=_dim_,
num_heads=num_heads,
num_points=hybrid_attn_points,
init_mode=hybrid_attn_init,
dropout=0.1),
dict(
type='TPVImageCrossAttention',
pc_range=point_cloud_range,
num_cams=6,
dropout=0.1,
deformable_attention=dict(
type='TPVMSDeformableAttention3D',
embed_dims=_dim_,
num_heads=num_heads,
num_points=num_points,
num_z_anchors=num_points_in_pillar,
num_levels=4,
floor_sampling_offset=False,
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_),
embed_dims=_dim_,
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_)
],
feedforward_channels=_ffn_dim_,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'))

self_layer = dict(
type='TPVFormerLayer',
attn_cfgs=[
dict(
type='TPVCrossViewHybridAttention',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_anchors=hybrid_attn_anchors,
embed_dims=_dim_,
num_heads=num_heads,
num_points=hybrid_attn_points,
init_mode=hybrid_attn_init,
dropout=0.1)
],
feedforward_channels=_ffn_dim_,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))

model = dict(
type='TPVFormer',
data_preprocessor=dict(
type='TPVFormerDataPreprocessor',
pad_size_divisor=32,
mean=[103.530, 116.280, 123.675],
std=[1.0, 1.0, 1.0],
voxel=True,
voxel_type='cylindrical',
voxel_layer=dict(
grid_shape=grid_shape,
point_cloud_range=point_cloud_range,
max_num_points=-1,
max_voxels=-1,
),
batch_augments=[
dict(
type='GridMask',
use_h=True,
use_w=True,
rotate=1,
offset=False,
ratio=0.5,
mode=1,
prob=0.7)
]),
backbone=dict(
type='mmdet.ResNet',
depth=101,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN2d', requires_grad=False),
norm_eval=True,
style='caffe',
dcn=dict(
type='DCNv2', deform_groups=1, fallback_on_stride=False
), # original DCNv2 will print log when perform load_state_dict
stage_with_dcn=(False, False, True, True),
init_cfg=dict(
type='Pretrained',
checkpoint='checkpoints/tpvformer_r101_dcn_fcos3d_pretrain.pth',
prefix='backbone.')),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=_dim_,
start_level=0,
add_extra_convs='on_output',
num_outs=4,
relu_before_extra_convs=True,
init_cfg=dict(
type='Pretrained',
checkpoint='checkpoints/tpvformer_r101_dcn_fcos3d_pretrain.pth',
prefix='neck.')),
encoder=dict(
type='TPVFormerEncoder',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_layers=5,
pc_range=point_cloud_range,
num_points_in_pillar=num_points_in_pillar,
num_points_in_pillar_cross_view=[16, 16, 16],
return_intermediate=False,
transformerlayers=[
self_cross_layer, self_cross_layer, self_cross_layer, self_layer,
self_layer
],
embed_dims=_dim_,
positional_encoding=dict(
type='TPVFormerPositionalEncoding',
num_feats=[48, 48, 32],
h=tpv_h_,
w=tpv_w_,
z=tpv_z_)),
decode_head=dict(
type='TPVFormerDecoder',
tpv_h=tpv_h_,
tpv_w=tpv_w_,
tpv_z=tpv_z_,
num_classes=17,
in_dims=_dim_,
hidden_dims=2 * _dim_,
out_dims=_dim_,
scale_h=scale_h,
scale_w=scale_w,
scale_z=scale_z,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
avg_non_ignore=True,
loss_weight=1.0),
loss_lovasz=dict(type='LovaszLoss', loss_weight=1.0, reduction='none'),
lovasz_input='points',
ce_input='voxel',
ignore_index=0))
17 changes: 17 additions & 0 deletions projects/TPVFormer/tpvformer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from .cross_view_hybrid_attention import TPVCrossViewHybridAttention
from .data_preprocessor import TPVFormerDataPreprocessor
from .image_cross_attention import TPVImageCrossAttention
from .loading import BEVLoadMultiViewImageFromFiles, SegLabelMapping
from .nuscenes_dataset import NuScenesSegDataset
from .positional_encoding import TPVFormerPositionalEncoding
from .tpvformer import TPVFormer
from .tpvformer_encoder import TPVFormerEncoder
from .tpvformer_head import TPVFormerDecoder
from .tpvformer_layer import TPVFormerLayer

__all__ = [
'TPVCrossViewHybridAttention', 'TPVImageCrossAttention',
'TPVFormerPositionalEncoding', 'TPVFormer', 'TPVFormerEncoder',
'TPVFormerLayer', 'NuScenesSegDataset', 'BEVLoadMultiViewImageFromFiles',
'SegLabelMapping', 'TPVFormerDecoder', 'TPVFormerDataPreprocessor'
]
Loading