Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
50f2b76
feat: MVXNet_voxelnext_rest
aravindbisht Apr 20, 2025
4e83dc5
feat:voxelNext_backbone
aravindbisht Apr 20, 2025
a9c24b5
Update __init__.py
aravindbisht Apr 20, 2025
dc1e7db
Create voxelnext_head.py
aravindbisht Apr 20, 2025
3d8af38
Update __init__.py
aravindbisht Apr 20, 2025
ea8f248
Create sparse_conv.py
aravindbisht Apr 20, 2025
f44e8f5
Update __init__.py
aravindbisht Apr 20, 2025
d5e1f41
Create voxelnext_neck.py
aravindbisht Apr 20, 2025
f0b4d9e
Update __init__.py
aravindbisht Apr 20, 2025
5920b02
Rename mvxnet_voxelnext_optimized_kitti.py to mvxnet_voxelnext_improv…
aravindbisht Apr 20, 2025
f8b1057
Create mvxnet_voxelnext_optimized_kitti.py
aravindbisht Apr 21, 2025
ffb7469
Update __init__.py
aravindbisht Apr 21, 2025
b678fb0
Create optimized_voxelnext_backbone.py
aravindbisht Apr 21, 2025
6209f18
Update __init__.py
aravindbisht Apr 21, 2025
818b1e0
Create optimized_voxelnext_head.py
aravindbisht Apr 21, 2025
0539def
Update __init__.py
aravindbisht Apr 21, 2025
d778053
Create optimized_sparse_conv.py
aravindbisht Apr 21, 2025
d636b97
Update __init__.py
aravindbisht Apr 21, 2025
e06145b
Create optimized_voxelnext_neck.py
aravindbisht Apr 21, 2025
6e2fed1
Update __init__.py
aravindbisht Apr 21, 2025
a5d0bbb
Create attention_fusion.py
aravindbisht Apr 21, 2025
3277577
Update mvxnet_voxelnext_improved_kitti.py
aravindbisht Apr 22, 2025
aa1457e
Update voxelnext_backbone.py
aravindbisht Apr 22, 2025
9c48ec5
Update voxelnext_head.py
aravindbisht Apr 22, 2025
bb604e6
Update voxelnext_head.py
aravindbisht Apr 22, 2025
b9fde0c
Update voxelnext_head.py
aravindbisht Apr 22, 2025
9c471fb
Update mvxnet_voxelnext_improved_kitti.py
aravindbisht Apr 22, 2025
95504e7
Update mvxnet_voxelnext_improved_kitti.py
aravindbisht Apr 22, 2025
d05c490
Update voxelnext_head.py
aravindbisht Apr 22, 2025
f84b148
Update voxelnext_head.py
aravindbisht Apr 22, 2025
4d8aa30
Update voxelnext_head.py
aravindbisht Apr 22, 2025
cb873ef
Update voxelnext_head.py
aravindbisht Apr 22, 2025
0361bf7
Update voxelnext_head.py
aravindbisht Apr 22, 2025
609b8c2
Update voxelnext_head.py
aravindbisht Apr 22, 2025
a564db4
Update voxelnext_head.py
aravindbisht Apr 22, 2025
742b868
Update voxelnext_head.py
aravindbisht Apr 22, 2025
404acd0
Update voxelnext_head.py
aravindbisht Apr 22, 2025
9138f13
Update voxelnext_head.py
aravindbisht Apr 22, 2025
3ebbd14
Update voxelnext_head.py
aravindbisht Apr 22, 2025
d9abde3
Update voxelnext_head.py
aravindbisht Apr 22, 2025
8ded876
Update voxelnext_head.py
aravindbisht Apr 22, 2025
680e943
Update voxelnext_head.py
aravindbisht Apr 23, 2025
efae020
Update mvxnet_voxelnext_improved_kitti.py
aravindbisht Apr 23, 2025
c2ea196
Update voxelnext_head.py
aravindbisht Apr 23, 2025
68121a6
Update voxelnext_head.py
aravindbisht Apr 23, 2025
4900784
Update transforms_3d.py
aravindbisht Apr 23, 2025
a6a2703
Create lightweight_voxelnext_backbone.py
aravindbisht Apr 23, 2025
bf06543
Update __init__.py
aravindbisht Apr 23, 2025
538a393
Update voxelnext_head.py
aravindbisht Apr 23, 2025
3f7c877
Create lightweight_attention_fusion.py
aravindbisht Apr 23, 2025
ff80bfb
Update __init__.py
aravindbisht Apr 23, 2025
21f7f52
Create mvxnet_voxelnext_improved_kitti-01.py
aravindbisht Apr 23, 2025
5429895
Update lightweight_voxelnext_backbone.py
aravindbisht Apr 23, 2025
fa032df
Update voxelnext_head.py
aravindbisht Apr 23, 2025
5b56a81
Update mvxnet_voxelnext_improved_kitti-01.py
aravindbisht Apr 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
293 changes: 293 additions & 0 deletions configs/mvxnet/mvxnet_voxelnext_improved_kitti-01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
_base_ = ['../_base_/schedules/cosine.py', '../_base_/default_runtime.py']

# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1]

model = dict(
type='DynamicMVXFasterRCNN',
data_preprocessor=dict(
type='Det3DDataPreprocessor',
voxel=True,
voxel_type='dynamic',
voxel_layer=dict(
max_num_points=20,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(40000, 40000)),
mean=[102.9801, 115.9465, 122.7717],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
img_backbone=dict(
type='mmdet.ResNet',
depth=34,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
img_neck=dict(
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=128,
norm_cfg=dict(type='BN', requires_grad=False),
num_outs=5),
pts_voxel_encoder=dict(
type='DynamicVFE',
in_channels=4,
feat_channels=[32, 32],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
fusion_layer=dict(
type='LightweightAttentionFusion',
img_channels=128,
pts_channels=128,
mid_channels=64,
out_channels=128,
num_heads=2,
dropout=0.0,
use_sparse_attention=True)),
pts_middle_encoder=dict(
type='SparseEncoder',
in_channels=64,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
pts_backbone=dict(
type='LightweightVoxelNeXtBackbone',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 64, 64],
sparse_shape=[41, 1600, 1408],
with_cp=True,
use_sparse_conv=True,
groups=4),
pts_neck=dict(
type='VoxelNeXtNeck',
in_channels=[64, 64, 64],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128],
use_sparse_conv=True),
pts_bbox_head=dict(
type='VoxelNeXtHead',
num_classes=3,
in_channels=128,
feat_channels=128,
use_direction_classifier=True,
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=0.2)),
train_cfg=dict(
pts=dict(
assigner=[
dict( # for Pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
nms_type='sparse', # Options: 'default', 'rotated', 'sparse'
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50)))

# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=True, use_camera=True)
backend_args = None
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, backend_args=backend_args),
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='GlobalRotScaleTrans', rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05]),
dict(type='LightweightPointAugmentation',
drop_ratio=0.1,
jitter_std=0.01,
rot_range=[-0.78539816, 0.78539816],
sample_ratio=0.9,
prob=0.5),
dict(type='SparseImageAugmentation',
drop_ratio=0.05,
contrast_range=[0.8, 1.2],
color_jitter=[0.0, 0.1],
prob=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1280, 384),
pts_scale_ratio=1,
flip=False,
transforms=[
# Temporary solution, fix this after refactor the augtest
dict(type='Resize', scale=0, keep_ratio=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
]),
dict(type='Pack3DDetInputs', keys=['points', 'img'])
]

modality = dict(use_lidar=True, use_camera=True)

train_dataloader = dict(
batch_size=2,
num_workers=2,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
modality=modality,
ann_file='kitti_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=train_pipeline,
filter_empty_gt=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
backend_args=backend_args)))

val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
modality=modality,
ann_file='kitti_infos_val.pkl',
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='LiDAR',
backend_args=backend_args))

test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='kitti_infos_val.pkl',
modality=modality,
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='LiDAR',
backend_args=backend_args))

# optim_wrapper = dict(
# optimizer=dict(weight_decay=0.01),
# clip_grad=dict(max_norm=35, norm_type=2),
# )

# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.01),
clip_grad=dict(max_norm=35, norm_type=2))

# learning policy
param_scheduler = [
dict(
type='LinearLR',
start_factor=0.001,
by_epoch=False,
begin=0,
end=1000),
dict(
type='MultiStepLR',
begin=0,
end=24,
by_epoch=True,
milestones=[20, 23],
gamma=0.1)
]

val_evaluator = dict(
type='KittiMetric', ann_file='data/kitti/kitti_infos_val.pkl')
test_evaluator = val_evaluator

# training schedule
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

# Default setting for scaling LR automatically
auto_scale_lr = dict(base_batch_size=16)
Loading