-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AttributeError: 'list' object has no attribute 'new_zeros' #22
Comments
And this is my config. _base_ = [
'../../../../configs/_base_/datasets/kitti-3d-3class.py',
'../../../../configs/_base_/schedules/cyclic_40e.py',
'../../../../configs/_base_/default_runtime.py'
]
plugin=True
plugin_dir='projects_uvtr/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
voxel_size = [0.05, 0.05, 0.1]
fp16_enabled = True
bev_stride = 4
sample_num = 5
# For nuScenes we usually do 10-class detection
class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
model = dict(
type='UVTR',
pts_voxel_layer=dict(
max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
point_cloud_range=point_cloud_range),
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
pts_middle_encoder=dict(
type='SparseEncoderHD',
in_channels=4,
sparse_shape=[41, 1600, 1408],
output_channels=256,
order=('conv', 'norm', 'act'),
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
block_type='basicblock',
fp16_enabled=False), # not enable FP16 here
pts_backbone=dict(
type='SECOND3D',
in_channels=[256, 256, 256],
out_channels=[128, 256, 512],
layer_nums=[5, 5, 5],
layer_strides=[1, 2, 4],
is_cascade=False,
norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
pts_neck=dict(
type='SECOND3DFPN',
in_channels=[128, 256, 512],
out_channels=[256, 256, 256],
upsample_strides=[1, 2, 4],
norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv3d', bias=False),
extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type='UVTRHead',
# transformer_cfg
num_query=300,
num_classes=3,
in_channels=256,
sync_cls_avg_factor=True,
with_box_refine=True,
as_two_stage=False,
transformer=dict(
type='Uni3DDETR',
fp16_enabled=fp16_enabled,
decoder=dict(
type='UniTransformerDecoder',
num_layers=3,
return_intermediate=True,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(
type='UniCrossAtten',
num_points=1,
embed_dims=256,
num_sweeps=1,
fp16_enabled=fp16_enabled)
],
ffn_cfgs=dict(
type='FFN',
embed_dims=256,
feedforward_channels=512,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True),
),
norm_cfg=dict(type='LN'),
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm'))
)
),
bbox_coder=dict(
type='NMSFreeCoder',
post_center_range=[0, -40, -3, 70.4, 40, 1],
pc_range=point_cloud_range,
max_num=100,
voxel_size=voxel_size,
num_classes=3),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=128,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
loss_iou=dict(type='GIoULoss', loss_weight=0.0),
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
),
# model training and testing settings
train_cfg=dict(pts=dict(
grid_size=[1408, 1600, 40],
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
out_size_factor=bev_stride,
assigner=dict(
type='HungarianAssigner3D',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
pc_range=point_cloud_range))))
dataset_type = 'KittiDataset'
data_root = '/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/'
file_client_args = dict(backend='disk')
db_sampler = dict(
type='UnifiedDataBaseSampler',
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', # please change to your own database file
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=12, Pedestrian=10, Cyclist=10),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=[0, 1, 2, 3],
file_client_args=file_client_args))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=dict(backend='disk')),
dict(
type='ObjectSample',
db_sampler=dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=['Pedestrian', 'Cyclist', 'Car'],
sample_groups=dict(Car=12, Pedestrian=10, Cyclist=10))),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(
type='PointsRangeFilter', point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='ObjectRangeFilter', point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(type='PointShuffle'),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car']),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=8,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type='KittiDataset',
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=dict(backend='disk')),
dict(
type='ObjectSample',
db_sampler=dict(
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
Car=5, Pedestrian=10, Cyclist=10)),
classes=['Pedestrian', 'Cyclist', 'Car'],
sample_groups=dict(Car=12, Pedestrian=10,
Cyclist=10))),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='ObjectRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(type='PointShuffle'),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car']),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
],
modality=dict(use_lidar=True, use_camera=False),
classes=['Pedestrian', 'Cyclist', 'Car'],
test_mode=False,
box_type_3d='LiDAR')),
val=dict(
type='KittiDataset',
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
],
modality=dict(use_lidar=True, use_camera=False),
classes=['Pedestrian', 'Cyclist', 'Car'],
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type='KittiDataset',
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
],
modality=dict(use_lidar=True, use_camera=False),
classes=['Pedestrian', 'Cyclist', 'Car'],
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(
interval=1,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
checkpoint_config = dict(interval=1)
runner = dict(type='EpochBasedRunner', max_epochs=40)
optimizer = dict(type='AdamW', lr=2e-5, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
work_dir = '/share/home/scz6240/openmmlab0171/mmdetection3d/work_dir/uvtr_dair'
find_unused_parameters = True
workflow = [('train', 1)]
gpu_ids = range(0, 1)
dist_params = dict(backend='nccl')
log_level = 'INFO'
# fp16 setting
fp16 = dict(loss_scale=32.) |
It seems like fp16 error, but I don't know how to settle this issue. Looking forward to your critical suggestions. Many thanks! |
Hi, I guess you should check the data format of points in this function |
Excuse me, I don't know how to modify the data format of input points. Could you give me some suggestions, please |
Sorry to bother again. I have tried to reimplement your LiDAR-based model on KITTI-like dataset. But after the first epoch, the error occurs as follows:
The text was updated successfully, but these errors were encountered: