diff --git a/configs/_base_/datasets/nus-seg.py b/configs/_base_/datasets/nus-seg.py new file mode 100644 index 000000000..16e4ac399 --- /dev/null +++ b/configs/_base_/datasets/nus-seg.py @@ -0,0 +1,209 @@ +# For nuScenes we usually do 16-class segmentation. +# For labels_map we follow the uniform format of MMDetection & MMSegmentation +# i.e. we consider the unlabeled class as the last one, which is different +# from the original implementation of some methods e.g. Cylinder3D. + +dataset_type = 'NuScenesSegDataset' +data_root = 'data/nuscenes/' +class_names = [ + 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', 'motorcycle', + 'pedestrian', 'traffic_cone', 'trailer', 'truck', 'driveable_surface', + 'other_flat', 'sidewalk', 'terrain', 'manmade', 'vegetation' +] +labels_map = { + 0: 16, + 1: 16, + 2: 6, + 3: 6, + 4: 6, + 5: 16, + 6: 6, + 7: 16, + 8: 16, + 9: 0, + 10: 16, + 11: 16, + 12: 7, + 13: 16, + 14: 1, + 15: 2, + 16: 2, + 17: 3, + 18: 4, + 19: 16, + 20: 16, + 21: 5, + 22: 8, + 23: 9, + 24: 10, + 25: 11, + 26: 12, + 27: 13, + 28: 14, + 29: 16, + 30: 15, + 31: 16 +} + +metainfo = dict( + classes=class_names, seg_label_mapping=labels_map, max_label=31) + +input_modality = dict(use_lidar=True, use_camera=False) +data_prefix = dict( + pts='samples/LIDAR_TOP', + img='', + pts_semantic_mask='lidarseg/v1.0-trainval') + +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection3d/nuscenes/' + +# Method 2: Use backend_args, file_client_args in versions before 1.1.0 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection3d/', +# 'data/': 's3://openmmlab/datasets/detection3d/' +# })) +backend_args = None + +train_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=5, + use_dim=4, + backend_args=backend_args), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_seg_3d=True, + seg_3d_dtype='np.uint8', + backend_args=backend_args), + dict(type='PointSegClassMapping'), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + flip_ratio_bev_vertical=0.5), + dict( + type='GlobalRotScaleTrans', + rot_range=[-0.78539816, 0.78539816], + scale_ratio_range=[0.95, 1.05], + translation_std=[0.1, 0.1, 0.1]), + dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask']) +] +test_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=5, + use_dim=4, + backend_args=backend_args), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_seg_3d=True, + seg_3d_dtype='np.uint8', + backend_args=backend_args), + dict(type='PointSegClassMapping'), + dict(type='Pack3DDetInputs', keys=['points']) +] +tta_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=5, + use_dim=4, + backend_args=backend_args), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_seg_3d=True, + seg_3d_dtype='np.uint8', + backend_args=backend_args), + dict(type='PointSegClassMapping'), + dict( + type='TestTimeAug', + transforms=[[ + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0., + flip_ratio_bev_vertical=0.), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0., + flip_ratio_bev_vertical=1.), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=1., + flip_ratio_bev_vertical=0.), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=1., + flip_ratio_bev_vertical=1.) + ], + [ + dict( + type='GlobalRotScaleTrans', + rot_range=[pcd_rotate_range, pcd_rotate_range], + scale_ratio_range=[ + pcd_scale_factor, pcd_scale_factor + ], + translation_std=[0, 0, 0]) + for pcd_rotate_range in [-0.78539816, 0.0, 0.78539816] + for pcd_scale_factor in [0.95, 1.0, 1.05] + ], [dict(type='Pack3DDetInputs', keys=['points'])]]) +] + +train_dataloader = dict( + batch_size=2, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='nuscenes_infos_train.pkl', + data_prefix=data_prefix, + pipeline=train_pipeline, + metainfo=metainfo, + modality=input_modality, + ignore_index=16, + backend_args=backend_args)) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='nuscenes_infos_val.pkl', + data_prefix=data_prefix, + pipeline=test_pipeline, + metainfo=metainfo, + modality=input_modality, + ignore_index=16, + test_mode=True, + backend_args=backend_args)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='SegMetric') +test_evaluator = val_evaluator + +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') + +tta_model = dict(type='Seg3DTTAModel') diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index d573ca4ed..e071989fa 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -3,7 +3,7 @@ from .det3d_dataset import Det3DDataset from .kitti_dataset import KittiDataset from .lyft_dataset import LyftDataset -from .nuscenes_dataset import NuScenesDataset +from .nuscenes_dataset import NuScenesDataset, NuScenesSegDataset # yapf: enable from .s3dis_dataset import S3DISDataset, S3DISSegDataset from .scannet_dataset import (ScanNetDataset, ScanNetInstanceSegDataset, @@ -38,4 +38,5 @@ 'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor', 'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize', 'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D', + 'NuScenesSegDataset' ] diff --git a/mmdet3d/datasets/nuscenes_dataset.py b/mmdet3d/datasets/nuscenes_dataset.py index 553480a58..490344ff6 100644 --- a/mmdet3d/datasets/nuscenes_dataset.py +++ b/mmdet3d/datasets/nuscenes_dataset.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from os import path as osp -from typing import Callable, List, Union +from typing import Callable, List, Optional, Union import numpy as np @@ -8,6 +8,7 @@ from mmdet3d.structures import LiDARInstance3DBoxes from mmdet3d.structures.bbox_3d.cam_box3d import CameraInstance3DBoxes from .det3d_dataset import Det3DDataset +from .seg3d_dataset import Seg3DDataset @DATASETS.register_module() @@ -246,3 +247,101 @@ def parse_data_info(self, info: dict) -> Union[List[dict], dict]: else: data_info = super().parse_data_info(info) return data_info + + +@DATASETS.register_module() +class NuScenesSegDataset(Seg3DDataset): + """NuScenes Dataset. + + This class serves as the API for experiments on the NuScenes Seg Dataset. + + Please refer to `NuScenes Dataset `_ + for data downloading. + + Args: + data_root (str, optional): Path of dataset root. Defaults to None. + ann_file (str): Path of annotation file. Defaults to ''. + metainfo (dict, optional): Meta information for dataset, such as class + information. Defaults to None. + data_prefix (dict): Prefix for training data. Defaults to + dict(pts='', img='', pts_instance_mask='', pts_semantic_mask=''). + pipeline (List[dict or Callable]): Pipeline used for data + preprocessing. Defaults to []. + modality (dict): Modality to specify the sensor data used as input, + it usually has following keys: + + - use_camera: bool + - use_lidar: bool + + Defaults to dict(use_lidar=True, use_camera=False). + ignore_index (int, optional): The label index to be ignored, e.g. + unannotated points. If None is given, set to len(self.classes) to + be consistent with PointSegClassMapping function in pipeline. + Defaults to None. + scene_idxs (str or np.ndarray, optional): Precomputed index to load + data. For scenes with many points, we may sample it several times. + Defaults to None. + test_mode (bool): Whether the dataset is in test mode. + Defaults to False. + """ + METAINFO = { + 'classes': ('barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', + 'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', + 'truck', 'driveable_surface', 'other_flat', 'sidewalk', + 'terrain', 'manmade', 'vegetation'), + 'palette': [[255, 120, 50], [255, 192, 203], [255, 255, 0], + [0, 150, 245], [0, 255, 255], [255, 127, 0], [255, 0, 0], + [255, 240, 150], [135, 60, 0], [160, 32, + 240], [255, 0, 255], + [139, 137, 137], [75, 0, 75], [150, 240, 80], + [230, 230, 250], [0, 175, 0]], + 'seg_valid_class_ids': + tuple(range(16)), + 'seg_all_class_ids': + tuple(range(16)), + } + + def __init__(self, + data_root: Optional[str] = None, + ann_file: str = '', + metainfo: Optional[dict] = None, + data_prefix: dict = dict( + pts='', + img='', + pts_instance_mask='', + pts_semantic_mask=''), + pipeline: List[Union[dict, Callable]] = [], + modality: dict = dict(use_lidar=True, use_camera=False), + ignore_index: Optional[int] = None, + scene_idxs: Optional[Union[str, np.ndarray]] = None, + test_mode: bool = False, + **kwargs) -> None: + super(NuScenesSegDataset, self).__init__( + data_root=data_root, + ann_file=ann_file, + metainfo=metainfo, + data_prefix=data_prefix, + pipeline=pipeline, + modality=modality, + ignore_index=ignore_index, + scene_idxs=scene_idxs, + test_mode=test_mode, + **kwargs) + + def get_seg_label_mapping(self, metainfo: dict) -> np.ndarray: + """Get segmentation label mapping. + + The ``seg_label_mapping`` is an array, its indices are the old label + ids and its values are the new label ids, and is specifically used for + changing point labels in PointSegClassMapping. + + Args: + metainfo (dict): Meta information to set seg_label_mapping. + + Returns: + np.ndarray: The mapping from old classes to new classes. + """ + seg_label_mapping = np.zeros(metainfo['max_label'] + 1, dtype=np.int64) + for idx in metainfo['seg_label_mapping']: + seg_label_mapping[idx] = metainfo['seg_label_mapping'][idx] + return seg_label_mapping diff --git a/mmdet3d/datasets/seg3d_dataset.py b/mmdet3d/datasets/seg3d_dataset.py index 5c4de10d7..239c4946a 100644 --- a/mmdet3d/datasets/seg3d_dataset.py +++ b/mmdet3d/datasets/seg3d_dataset.py @@ -263,8 +263,12 @@ def parse_data_info(self, info: dict) -> dict: if self.modality['use_camera']: for cam_id, img_info in info['images'].items(): if 'img_path' in img_info: - img_info['img_path'] = osp.join( - self.data_prefix.get('img', ''), img_info['img_path']) + if cam_id in self.data_prefix: + cam_prefix = self.data_prefix[cam_id] + else: + cam_prefix = self.data_prefix.get('img', '') + img_info['img_path'] = osp.join(cam_prefix, + img_info['img_path']) if 'pts_instance_mask_path' in info: info['pts_instance_mask_path'] = \ diff --git a/tests/data/nuscenes/lidarseg/v1.0-trainval/5f6c1e0a0c3b444b97e78d2aa3fa34d2_lidarseg.bin b/tests/data/nuscenes/lidarseg/v1.0-trainval/5f6c1e0a0c3b444b97e78d2aa3fa34d2_lidarseg.bin new file mode 100644 index 000000000..f47cb92a8 --- /dev/null +++ b/tests/data/nuscenes/lidarseg/v1.0-trainval/5f6c1e0a0c3b444b97e78d2aa3fa34d2_lidarseg.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tests/data/nuscenes/nus_info.pkl b/tests/data/nuscenes/nus_info.pkl index e1976f200..031ed5cf5 100644 Binary files a/tests/data/nuscenes/nus_info.pkl and b/tests/data/nuscenes/nus_info.pkl differ diff --git a/tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470948018.pcd.bin b/tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470948018.pcd.bin index 5c278a6c8..91b419a6b 100644 Binary files a/tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470948018.pcd.bin and b/tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+0800__LIDAR_TOP__1533201470948018.pcd.bin differ diff --git a/tests/test_datasets/test_nuscenes_dataset.py b/tests/test_datasets/test_nuscenes_dataset.py index 4b85f34f4..8c558316e 100644 --- a/tests/test_datasets/test_nuscenes_dataset.py +++ b/tests/test_datasets/test_nuscenes_dataset.py @@ -1,11 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. +import unittest + import numpy as np from mmcv.transforms.base import BaseTransform from mmengine.registry import TRANSFORMS from mmengine.structures import InstanceData -from mmdet3d.datasets import NuScenesDataset +from mmdet3d.datasets import NuScenesDataset, NuScenesSegDataset from mmdet3d.structures import Det3DDataSample, LiDARInstance3DBoxes +from mmdet3d.utils import register_all_modules def _generate_nus_dataset_config(): @@ -41,41 +44,144 @@ def transform(self, info): return data_root, ann_file, classes, data_prefix, pipeline, modality -def test_getitem(): - np.random.seed(0) - data_root, ann_file, classes, data_prefix, pipeline, modality = \ - _generate_nus_dataset_config() - - nus_dataset = NuScenesDataset( - data_root=data_root, - ann_file=ann_file, - data_prefix=data_prefix, - pipeline=pipeline, - metainfo=dict(classes=classes), - modality=modality) - - nus_dataset.prepare_data(0) - input_dict = nus_dataset.get_data_info(0) - # assert the the path should contains data_prefix and data_root - assert data_prefix['pts'] in input_dict['lidar_points']['lidar_path'] - assert data_root in input_dict['lidar_points']['lidar_path'] - - for cam_id, img_info in input_dict['images'].items(): - if 'img_path' in img_info: - assert data_prefix['img'] in img_info['img_path'] - assert data_root in img_info['img_path'] - - ann_info = nus_dataset.parse_ann_info(input_dict) - - # assert the keys in ann_info and the type - assert 'gt_labels_3d' in ann_info - assert ann_info['gt_labels_3d'].dtype == np.int64 - assert len(ann_info['gt_labels_3d']) == 37 - - assert 'gt_bboxes_3d' in ann_info - assert isinstance(ann_info['gt_bboxes_3d'], LiDARInstance3DBoxes) - - assert len(nus_dataset.metainfo['classes']) == 10 - - assert input_dict['token'] == 'fd8420396768425eabec9bdddf7e64b6' - assert input_dict['timestamp'] == 1533201470.448696 +def _generate_nus_seg_dataset_config(): + data_root = './tests/data/nuscenes' + ann_file = 'nus_info.pkl' + classes = ('barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', + 'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck', + 'driveable_surface', 'other_flat', 'sidewalk', 'terrain', + 'manmade', 'vegetation') + seg_label_mapping = { + 0: 16, + 1: 16, + 2: 6, + 3: 6, + 4: 6, + 5: 16, + 6: 6, + 7: 16, + 8: 16, + 9: 0, + 10: 16, + 11: 16, + 12: 7, + 13: 16, + 14: 1, + 15: 2, + 16: 2, + 17: 3, + 18: 4, + 19: 16, + 20: 16, + 21: 5, + 22: 8, + 23: 9, + 24: 10, + 25: 11, + 26: 12, + 27: 13, + 28: 14, + 29: 16, + 30: 15, + 31: 16 + } + max_label = 31 + modality = dict(use_lidar=True, use_camera=False) + pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + shift_height=True, + load_dim=5, + use_dim=4), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True, + seg_3d_dtype='np.uint8'), + dict(type='PointSegClassMapping'), + dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask']) + ] + data_prefix = dict( + pts='samples/LIDAR_TOP', pts_semantic_mask='lidarseg/v1.0-trainval') + + return (data_root, ann_file, classes, data_prefix, pipeline, modality, + seg_label_mapping, max_label) + + +class TestNuScenesDataset(unittest.TestCase): + + def test_nuscenes(self): + np.random.seed(0) + data_root, ann_file, classes, data_prefix, pipeline, modality = \ + _generate_nus_dataset_config() + + nus_dataset = NuScenesDataset( + data_root=data_root, + ann_file=ann_file, + data_prefix=data_prefix, + pipeline=pipeline, + metainfo=dict(classes=classes), + modality=modality) + + nus_dataset.prepare_data(0) + input_dict = nus_dataset.get_data_info(0) + # assert the path should contains data_prefix and data_root + self.assertIn(data_prefix['pts'], + input_dict['lidar_points']['lidar_path']) + self.assertIn(data_root, input_dict['lidar_points']['lidar_path']) + + for cam_id, img_info in input_dict['images'].items(): + if 'img_path' in img_info: + self.assertIn(data_prefix['img'], img_info['img_path']) + self.assertIn(data_root, img_info['img_path']) + + ann_info = nus_dataset.parse_ann_info(input_dict) + + # assert the keys in ann_info and the type + self.assertIn('gt_labels_3d', ann_info) + self.assertEqual(ann_info['gt_labels_3d'].dtype, np.int64) + assert len(ann_info['gt_labels_3d']) == 37 + + self.assertIn('gt_bboxes_3d', ann_info) + self.assertIsInstance(ann_info['gt_bboxes_3d'], LiDARInstance3DBoxes) + + assert len(nus_dataset.metainfo['classes']) == 10 + + self.assertEqual(input_dict['token'], + 'fd8420396768425eabec9bdddf7e64b6') + self.assertEqual(input_dict['timestamp'], 1533201470.448696) + + def test_nuscenes_seg(self): + data_root, ann_file, classes, data_prefix, pipeline, modality, \ + seg_label_mapping, max_label = _generate_nus_seg_dataset_config() + + register_all_modules() + np.random.seed(0) + + nus_seg_dataset = NuScenesSegDataset( + data_root=data_root, + ann_file=ann_file, + data_prefix=data_prefix, + pipeline=pipeline, + metainfo=dict( + classes=classes, + seg_label_mapping=seg_label_mapping, + max_label=max_label), + modality=modality) + + expected_pts_semantic_mask = np.array([ + 10, 10, 14, 14, 10, 16, 14, 10, 16, 14, 10, 10, 10, 10, 13, 10, 14, + 14, 10, 16, 14, 3, 16, 14, 16, 10, 10, 16, 16, 10, 10, 14, 16, 10, + 15, 14, 14, 14, 16, 3 + ]) + + input_dict = nus_seg_dataset.prepare_data(0) + points = input_dict['inputs']['points'] + data_sample = input_dict['data_samples'] + pts_semantic_mask = data_sample.gt_pts_seg.pts_semantic_mask + self.assertEqual(points.shape[0], pts_semantic_mask.shape[0]) + self.assertTrue( + (pts_semantic_mask.numpy() == expected_pts_semantic_mask).all())