diff --git a/.gitignore b/.gitignore index 912d430003..0aa45b3f13 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,7 @@ exps/ # demo *.jpg *.png +/data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/ /data/scannet/scans/ /data/sunrgbd/OFFICIAL_SUNRGBD/ *.obj diff --git a/configs/_base_/datasets/s3dis_seg-3d-13class.py b/configs/_base_/datasets/s3dis_seg-3d-13class.py new file mode 100644 index 0000000000..1fb7d4b7e7 --- /dev/null +++ b/configs/_base_/datasets/s3dis_seg-3d-13class.py @@ -0,0 +1,116 @@ +# dataset settings +dataset_type = 'S3DISSegDataset' +data_root = './data/s3dis/' +class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', + 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') +num_points = 4096 +train_area = [1, 2, 3, 4, 6] +test_area = 5 +train_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + use_color=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=tuple(range(len(class_names)))), + dict( + type='IndoorPatchPointSample', + num_points=num_points, + block_size=1.0, + sample_rate=1.0, + ignore_index=len(class_names), + use_normalized_coord=True), + dict(type='NormalizePointsColor', color_mean=None), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) +] +test_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + use_color=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict(type='NormalizePointsColor', color_mean=None), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict(type='Collect3D', keys=['points']) +] +# construct a pipeline for data and gt loading in show function +# please keep its loading function consistent with test_pipeline (e.g. client) +# we need to load gt seg_mask! +eval_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + use_color=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=tuple(range(len(class_names)))), + dict( + type='DefaultFormatBundle3D', + with_label=False, + class_names=class_names), + dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) +] + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + # train on area 1, 2, 3, 4, 6 + # test on area 5 + train=dict( + type=dataset_type, + data_root=data_root, + ann_files=[ + data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area + ], + pipeline=train_pipeline, + classes=class_names, + test_mode=False, + ignore_index=len(class_names), + scene_idxs=[ + data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' + for i in train_area + ], + label_weight=[ + data_root + f'seg_info/Area_{i}_label_weight.npy' + for i in train_area + ]), + val=dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', + pipeline=test_pipeline, + classes=class_names, + test_mode=True, + ignore_index=len(class_names)), + test=dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', + pipeline=test_pipeline, + classes=class_names, + test_mode=True, + ignore_index=len(class_names))) + +evaluation = dict(pipeline=eval_pipeline) diff --git a/data/s3dis/README.md b/data/s3dis/README.md new file mode 100644 index 0000000000..25802ccc7d --- /dev/null +++ b/data/s3dis/README.md @@ -0,0 +1,58 @@ +### Prepare S3DIS Data + +We follow the procedure in [pointnet](https://github.com/charlesq34/pointnet). + +1. Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the ```Stanford3dDataset_v1.2_Aligned_Version.zip``` file and unzip it. Link or move the folder to this level of directory. + +2. In this directory, extract point clouds and annotations by running `python collect_indoor3d_data.py`. + +3. Enter the project root directory, generate training data by running + +```bash +python tools/create_data.py s3dis --root-path ./data/s3dis --out-dir ./data/s3dis --extra-tag s3dis +``` + +The overall process could be achieved through the following script + +```bash +python collect_indoor3d_data.py +cd ../.. +python tools/create_data.py s3dis --root-path ./data/s3dis --out-dir ./data/s3dis --extra-tag s3dis +``` + +The directory structure after pre-processing should be as below + +``` +s3dis +├── indoor3d_util.py +├── collect_indoor3d_data.py +├── README.md +├── Stanford3dDataset_v1.2_Aligned_Version +├── s3dis_data +├── points +│ ├── xxxxx.bin +├── instance_mask +│ ├── xxxxx.bin +├── semantic_mask +│ ├── xxxxx.bin +├── seg_info +│ ├── Area_1_label_weight.npy +│ ├── Area_1_resampled_scene_idxs.npy +│ ├── Area_2_label_weight.npy +│ ├── Area_2_resampled_scene_idxs.npy +│ ├── Area_3_label_weight.npy +│ ├── Area_3_resampled_scene_idxs.npy +│ ├── Area_4_label_weight.npy +│ ├── Area_4_resampled_scene_idxs.npy +│ ├── Area_5_label_weight.npy +│ ├── Area_5_resampled_scene_idxs.npy +│ ├── Area_6_label_weight.npy +│ ├── Area_6_resampled_scene_idxs.npy +├── s3dis_infos_Area_1.pkl +├── s3dis_infos_Area_2.pkl +├── s3dis_infos_Area_3.pkl +├── s3dis_infos_Area_4.pkl +├── s3dis_infos_Area_5.pkl +├── s3dis_infos_Area_6.pkl + +``` diff --git a/data/s3dis/collect_indoor3d_data.py b/data/s3dis/collect_indoor3d_data.py new file mode 100644 index 0000000000..00102cee88 --- /dev/null +++ b/data/s3dis/collect_indoor3d_data.py @@ -0,0 +1,49 @@ +import argparse +import mmcv +from os import path as osp + +from .indoor3d_util import export + +parser = argparse.ArgumentParser() +parser.add_argument( + '--output-folder', + default='./s3dis_data', + help='output folder of the result.') +parser.add_argument( + '--data-dir', + default='Stanford3dDataset_v1.2_Aligned_Version', + help='s3dis data directory.') +parser.add_argument( + '--ann-file', + default='meta_data/anno_paths.txt', + help='The path of the file that stores the annotation names.') +args = parser.parse_args() + +anno_paths = [line.rstrip() for line in open(args.ann_file)] +anno_paths = [osp.join(args.data_dir, p) for p in anno_paths] + +output_folder = args.output_folder +mmcv.mkdir_or_exist(output_folder) + +# Note: there is an extra character in the v1.2 data in Area_5/hallway_6. +# It's fixed manually here. +# Refer to https://github.com/AnTao97/dgcnn.pytorch/blob/843abe82dd731eb51a4b3f70632c2ed3c60560e9/prepare_data/collect_indoor3d_data.py#L18 # noqa +revise_file = osp.join(args.data_dir, + 'Area_5/hallway_6/Annotations/ceiling_1.txt') +with open(revise_file, 'r') as f: + data = f.read() + # replace that extra character with blank space to separate data + data = data[:5545347] + ' ' + data[5545348:] +with open(revise_file, 'w') as f: + f.write(data) + +for anno_path in anno_paths: + print(f'Exporting data from annotation file: {anno_path}') + elements = anno_path.split('/') + out_filename = \ + elements[-3] + '_' + elements[-2] # Area_1_hallway_1 + out_filename = osp.join(output_folder, out_filename) + if osp.isfile(f'{out_filename}_point.npy'): + print('File already exists. skipping.') + continue + export(anno_path, out_filename) diff --git a/data/s3dis/indoor3d_util.py b/data/s3dis/indoor3d_util.py new file mode 100644 index 0000000000..a500a1d4e0 --- /dev/null +++ b/data/s3dis/indoor3d_util.py @@ -0,0 +1,53 @@ +import glob +import numpy as np +from os import path as osp + +# ----------------------------------------------------------------------------- +# CONSTANTS +# ----------------------------------------------------------------------------- + +BASE_DIR = osp.dirname(osp.abspath(__file__)) + +class_names = [ + x.rstrip() for x in open(osp.join(BASE_DIR, 'meta_data/class_names.txt')) +] +class2label = {one_class: i for i, one_class in enumerate(class_names)} + +# ----------------------------------------------------------------------------- +# CONVERT ORIGINAL DATA TO POINTS, SEM_LABEL AND INS_LABEL FILES +# ----------------------------------------------------------------------------- + + +def export(anno_path, out_filename): + """Convert original dataset files to points, instance mask and semantic + mask files. We aggregated all the points from each instance in the room. + + Args: + anno_path (str): path to annotations. e.g. Area_1/office_2/Annotations/ + out_filename (str): path to save collected points and labels + file_format (str): txt or numpy, determines what file format to save. + + Note: + the points are shifted before save, the most negative point is now + at origin. + """ + points_list = [] + ins_idx = 1 # instance ids should be indexed from 1, so 0 is unannotated + + for f in glob.glob(osp.join(anno_path, '*.txt')): + one_class = osp.basename(f).split('_')[0] + if one_class not in class_names: # some rooms have 'staris' class + one_class = 'clutter' + points = np.loadtxt(f) + labels = np.ones((points.shape[0], 1)) * class2label[one_class] + ins_labels = np.ones((points.shape[0], 1)) * ins_idx + ins_idx += 1 + points_list.append(np.concatenate([points, labels, ins_labels], 1)) + + data_label = np.concatenate(points_list, 0) # [N, 8], (pts, rgb, sem, ins) + xyz_min = np.amin(data_label, axis=0)[0:3] + data_label[:, 0:3] -= xyz_min + + np.save(f'{out_filename}_point.npy', data_label[:, :6].astype(np.float32)) + np.save(f'{out_filename}_sem_label.npy', data_label[:, 6].astype(np.int)) + np.save(f'{out_filename}_ins_label.npy', data_label[:, 7].astype(np.int)) diff --git a/data/s3dis/meta_data/anno_paths.txt b/data/s3dis/meta_data/anno_paths.txt new file mode 100644 index 0000000000..0ad2f25994 --- /dev/null +++ b/data/s3dis/meta_data/anno_paths.txt @@ -0,0 +1,272 @@ +Area_1/conferenceRoom_1/Annotations +Area_1/conferenceRoom_2/Annotations +Area_1/copyRoom_1/Annotations +Area_1/hallway_1/Annotations +Area_1/hallway_2/Annotations +Area_1/hallway_3/Annotations +Area_1/hallway_4/Annotations +Area_1/hallway_5/Annotations +Area_1/hallway_6/Annotations +Area_1/hallway_7/Annotations +Area_1/hallway_8/Annotations +Area_1/office_10/Annotations +Area_1/office_11/Annotations +Area_1/office_12/Annotations +Area_1/office_13/Annotations +Area_1/office_14/Annotations +Area_1/office_15/Annotations +Area_1/office_16/Annotations +Area_1/office_17/Annotations +Area_1/office_18/Annotations +Area_1/office_19/Annotations +Area_1/office_1/Annotations +Area_1/office_20/Annotations +Area_1/office_21/Annotations +Area_1/office_22/Annotations +Area_1/office_23/Annotations +Area_1/office_24/Annotations +Area_1/office_25/Annotations +Area_1/office_26/Annotations +Area_1/office_27/Annotations +Area_1/office_28/Annotations +Area_1/office_29/Annotations +Area_1/office_2/Annotations +Area_1/office_30/Annotations +Area_1/office_31/Annotations +Area_1/office_3/Annotations +Area_1/office_4/Annotations +Area_1/office_5/Annotations +Area_1/office_6/Annotations +Area_1/office_7/Annotations +Area_1/office_8/Annotations +Area_1/office_9/Annotations +Area_1/pantry_1/Annotations +Area_1/WC_1/Annotations +Area_2/auditorium_1/Annotations +Area_2/auditorium_2/Annotations +Area_2/conferenceRoom_1/Annotations +Area_2/hallway_10/Annotations +Area_2/hallway_11/Annotations +Area_2/hallway_12/Annotations +Area_2/hallway_1/Annotations +Area_2/hallway_2/Annotations +Area_2/hallway_3/Annotations +Area_2/hallway_4/Annotations +Area_2/hallway_5/Annotations +Area_2/hallway_6/Annotations +Area_2/hallway_7/Annotations +Area_2/hallway_8/Annotations +Area_2/hallway_9/Annotations +Area_2/office_10/Annotations +Area_2/office_11/Annotations +Area_2/office_12/Annotations +Area_2/office_13/Annotations +Area_2/office_14/Annotations +Area_2/office_1/Annotations +Area_2/office_2/Annotations +Area_2/office_3/Annotations +Area_2/office_4/Annotations +Area_2/office_5/Annotations +Area_2/office_6/Annotations +Area_2/office_7/Annotations +Area_2/office_8/Annotations +Area_2/office_9/Annotations +Area_2/storage_1/Annotations +Area_2/storage_2/Annotations +Area_2/storage_3/Annotations +Area_2/storage_4/Annotations +Area_2/storage_5/Annotations +Area_2/storage_6/Annotations +Area_2/storage_7/Annotations +Area_2/storage_8/Annotations +Area_2/storage_9/Annotations +Area_2/WC_1/Annotations +Area_2/WC_2/Annotations +Area_3/conferenceRoom_1/Annotations +Area_3/hallway_1/Annotations +Area_3/hallway_2/Annotations +Area_3/hallway_3/Annotations +Area_3/hallway_4/Annotations +Area_3/hallway_5/Annotations +Area_3/hallway_6/Annotations +Area_3/lounge_1/Annotations +Area_3/lounge_2/Annotations +Area_3/office_10/Annotations +Area_3/office_1/Annotations +Area_3/office_2/Annotations +Area_3/office_3/Annotations +Area_3/office_4/Annotations +Area_3/office_5/Annotations +Area_3/office_6/Annotations +Area_3/office_7/Annotations +Area_3/office_8/Annotations +Area_3/office_9/Annotations +Area_3/storage_1/Annotations +Area_3/storage_2/Annotations +Area_3/WC_1/Annotations +Area_3/WC_2/Annotations +Area_4/conferenceRoom_1/Annotations +Area_4/conferenceRoom_2/Annotations +Area_4/conferenceRoom_3/Annotations +Area_4/hallway_10/Annotations +Area_4/hallway_11/Annotations +Area_4/hallway_12/Annotations +Area_4/hallway_13/Annotations +Area_4/hallway_14/Annotations +Area_4/hallway_1/Annotations +Area_4/hallway_2/Annotations +Area_4/hallway_3/Annotations +Area_4/hallway_4/Annotations +Area_4/hallway_5/Annotations +Area_4/hallway_6/Annotations +Area_4/hallway_7/Annotations +Area_4/hallway_8/Annotations +Area_4/hallway_9/Annotations +Area_4/lobby_1/Annotations +Area_4/lobby_2/Annotations +Area_4/office_10/Annotations +Area_4/office_11/Annotations +Area_4/office_12/Annotations +Area_4/office_13/Annotations +Area_4/office_14/Annotations +Area_4/office_15/Annotations +Area_4/office_16/Annotations +Area_4/office_17/Annotations +Area_4/office_18/Annotations +Area_4/office_19/Annotations +Area_4/office_1/Annotations +Area_4/office_20/Annotations +Area_4/office_21/Annotations +Area_4/office_22/Annotations +Area_4/office_2/Annotations +Area_4/office_3/Annotations +Area_4/office_4/Annotations +Area_4/office_5/Annotations +Area_4/office_6/Annotations +Area_4/office_7/Annotations +Area_4/office_8/Annotations +Area_4/office_9/Annotations +Area_4/storage_1/Annotations +Area_4/storage_2/Annotations +Area_4/storage_3/Annotations +Area_4/storage_4/Annotations +Area_4/WC_1/Annotations +Area_4/WC_2/Annotations +Area_4/WC_3/Annotations +Area_4/WC_4/Annotations +Area_5/conferenceRoom_1/Annotations +Area_5/conferenceRoom_2/Annotations +Area_5/conferenceRoom_3/Annotations +Area_5/hallway_10/Annotations +Area_5/hallway_11/Annotations +Area_5/hallway_12/Annotations +Area_5/hallway_13/Annotations +Area_5/hallway_14/Annotations +Area_5/hallway_15/Annotations +Area_5/hallway_1/Annotations +Area_5/hallway_2/Annotations +Area_5/hallway_3/Annotations +Area_5/hallway_4/Annotations +Area_5/hallway_5/Annotations +Area_5/hallway_6/Annotations +Area_5/hallway_7/Annotations +Area_5/hallway_8/Annotations +Area_5/hallway_9/Annotations +Area_5/lobby_1/Annotations +Area_5/office_10/Annotations +Area_5/office_11/Annotations +Area_5/office_12/Annotations +Area_5/office_13/Annotations +Area_5/office_14/Annotations +Area_5/office_15/Annotations +Area_5/office_16/Annotations +Area_5/office_17/Annotations +Area_5/office_18/Annotations +Area_5/office_19/Annotations +Area_5/office_1/Annotations +Area_5/office_20/Annotations +Area_5/office_21/Annotations +Area_5/office_22/Annotations +Area_5/office_23/Annotations +Area_5/office_24/Annotations +Area_5/office_25/Annotations +Area_5/office_26/Annotations +Area_5/office_27/Annotations +Area_5/office_28/Annotations +Area_5/office_29/Annotations +Area_5/office_2/Annotations +Area_5/office_30/Annotations +Area_5/office_31/Annotations +Area_5/office_32/Annotations +Area_5/office_33/Annotations +Area_5/office_34/Annotations +Area_5/office_35/Annotations +Area_5/office_36/Annotations +Area_5/office_37/Annotations +Area_5/office_38/Annotations +Area_5/office_39/Annotations +Area_5/office_3/Annotations +Area_5/office_40/Annotations +Area_5/office_41/Annotations +Area_5/office_42/Annotations +Area_5/office_4/Annotations +Area_5/office_5/Annotations +Area_5/office_6/Annotations +Area_5/office_7/Annotations +Area_5/office_8/Annotations +Area_5/office_9/Annotations +Area_5/pantry_1/Annotations +Area_5/storage_1/Annotations +Area_5/storage_2/Annotations +Area_5/storage_3/Annotations +Area_5/storage_4/Annotations +Area_5/WC_1/Annotations +Area_5/WC_2/Annotations +Area_6/conferenceRoom_1/Annotations +Area_6/copyRoom_1/Annotations +Area_6/hallway_1/Annotations +Area_6/hallway_2/Annotations +Area_6/hallway_3/Annotations +Area_6/hallway_4/Annotations +Area_6/hallway_5/Annotations +Area_6/hallway_6/Annotations +Area_6/lounge_1/Annotations +Area_6/office_10/Annotations +Area_6/office_11/Annotations +Area_6/office_12/Annotations +Area_6/office_13/Annotations +Area_6/office_14/Annotations +Area_6/office_15/Annotations +Area_6/office_16/Annotations +Area_6/office_17/Annotations +Area_6/office_18/Annotations +Area_6/office_19/Annotations +Area_6/office_1/Annotations +Area_6/office_20/Annotations +Area_6/office_21/Annotations +Area_6/office_22/Annotations +Area_6/office_23/Annotations +Area_6/office_24/Annotations +Area_6/office_25/Annotations +Area_6/office_26/Annotations +Area_6/office_27/Annotations +Area_6/office_28/Annotations +Area_6/office_29/Annotations +Area_6/office_2/Annotations +Area_6/office_30/Annotations +Area_6/office_31/Annotations +Area_6/office_32/Annotations +Area_6/office_33/Annotations +Area_6/office_34/Annotations +Area_6/office_35/Annotations +Area_6/office_36/Annotations +Area_6/office_37/Annotations +Area_6/office_3/Annotations +Area_6/office_4/Annotations +Area_6/office_5/Annotations +Area_6/office_6/Annotations +Area_6/office_7/Annotations +Area_6/office_8/Annotations +Area_6/office_9/Annotations +Area_6/openspace_1/Annotations +Area_6/pantry_1/Annotations diff --git a/data/s3dis/meta_data/class_names.txt b/data/s3dis/meta_data/class_names.txt new file mode 100644 index 0000000000..ca1d178828 --- /dev/null +++ b/data/s3dis/meta_data/class_names.txt @@ -0,0 +1,13 @@ +ceiling +floor +wall +beam +column +window +door +table +chair +sofa +bookcase +board +clutter diff --git a/docs/data_preparation.md b/docs/data_preparation.md index c1dbf8d5f0..79e613f5d3 100644 --- a/docs/data_preparation.md +++ b/docs/data_preparation.md @@ -51,6 +51,12 @@ mmdetection3d │ │ ├── val.txt │ │ ├── test.txt │ │ ├── sample_submission.csv +│ ├── s3dis +│ │ ├── meta_data +│ │ ├── Stanford3dDataset_v1.2_Aligned_Version +│ │ ├── collect_indoor3d_data.py +│ │ ├── indoor3d_util.py +│ │ ├── README.md │ ├── scannet │ │ ├── meta_data │ │ ├── scans @@ -113,7 +119,9 @@ python tools/create_data.py lyft --root-path ./data/lyft --out-dir ./data/lyft - Note that we follow the original folder names for clear organization. Please rename the raw folders as shown above. -### ScanNet and SUN RGB-D +### S3DIS, ScanNet and SUN RGB-D + +To prepare s3dis data, please see [s3dis](https://github.com/open-mmlab/mmdetection3d/blob/master/data/s3dis/README.md). To prepare scannet data, please see [scannet](https://github.com/open-mmlab/mmdetection3d/blob/master/data/scannet/README.md). diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index a6f1bcbf91..df07e91831 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -13,6 +13,7 @@ NormalizePointsColor, ObjectNoise, ObjectRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter, RandomFlip3D, VoxelBasedPointSampler) +from .s3dis_dataset import S3DISSegDataset from .scannet_dataset import ScanNetDataset, ScanNetSegDataset from .semantickitti_dataset import SemanticKITTIDataset from .sunrgbd_dataset import SUNRGBDDataset @@ -27,7 +28,7 @@ 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample', 'LoadAnnotations3D', - 'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset', + 'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset', 'S3DISSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset', 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'get_loading_pipeline' diff --git a/mmdet3d/datasets/custom_3d_seg.py b/mmdet3d/datasets/custom_3d_seg.py index b4f7284e7e..b976e5f55d 100644 --- a/mmdet3d/datasets/custom_3d_seg.py +++ b/mmdet3d/datasets/custom_3d_seg.py @@ -268,6 +268,8 @@ def get_scene_idxs_and_label_weight(self, scene_idxs, label_weight): return np.arange(len(self.data_infos)).astype(np.int32), \ np.ones(len(self.CLASSES)).astype(np.float32) + # we may need to re-sample different scenes according to scene_idxs + # this is necessary for indoor scene segmentation such as ScanNet if scene_idxs is None: scene_idxs = np.arange(len(self.data_infos)) if isinstance(scene_idxs, str): @@ -461,6 +463,10 @@ def __len__(self): def __getitem__(self, idx): """Get item from infos according to the given index. + In indoor scene segmentation task, each scene contains millions of + points. However, we only sample less than 10k points within a patch + each time. Therefore, we use `scene_idxs` to re-sample different rooms. + Returns: dict: Data dictionary of the corresponding index. """ diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py new file mode 100644 index 0000000000..c8a4f077ae --- /dev/null +++ b/mmdet3d/datasets/s3dis_dataset.py @@ -0,0 +1,333 @@ +import numpy as np +from os import path as osp + +from mmdet3d.core import show_seg_result +from mmdet.datasets import DATASETS +from .custom_3d_seg import Custom3DSegDataset +from .pipelines import Compose + + +@DATASETS.register_module() +class _S3DISSegDataset(Custom3DSegDataset): + r"""S3DIS Dataset for Semantic Segmentation Task. + + This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we + often train on 5 of them and test on the remaining one. + However, there is not a fixed train-test split of S3DIS. People often test + on Area_5 as suggested by `SEGCloud `_. + But many papers also report the average results of 6-fold cross validation + over the 6 areas (e.g. `DGCNN `_). + Therefore, we use an inner dataset for one area, and further use a dataset + wrapper to concat all the provided data in different areas. + + Args: + data_root (str): Path of dataset root. + ann_file (str): Path of annotation file. + pipeline (list[dict], optional): Pipeline used for data processing. + Defaults to None. + classes (tuple[str], optional): Classes used in the dataset. + Defaults to None. + palette (list[list[int]], optional): The palette of segmentation map. + Defaults to None. + modality (dict, optional): Modality to specify the sensor data used + as input. Defaults to None. + test_mode (bool, optional): Whether the dataset is in test mode. + Defaults to False. + ignore_index (int, optional): The label index to be ignored, e.g. \ + unannotated points. If None is given, set to len(self.CLASSES). + Defaults to None. + scene_idxs (np.ndarray | str, optional): Precomputed index to load + data. For scenes with many points, we may sample it several times. + Defaults to None. + label_weight (np.ndarray | str, optional): Precomputed weight to \ + balance loss calculation. If None is given, compute from data. + Defaults to None. + """ + CLASSES = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', + 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') + + VALID_CLASS_IDS = tuple(range(13)) + + ALL_CLASS_IDS = tuple(range(14)) # possibly with 'stair' class + + PALETTE = [[0, 255, 0], [0, 0, 255], [0, 255, 255], [255, 255, 0], + [255, 0, 255], [100, 100, 255], [200, 200, 100], + [170, 120, 200], [255, 0, 0], [200, 100, 100], [10, 200, 100], + [200, 200, 200], [50, 50, 50]] + + def __init__(self, + data_root, + ann_file, + pipeline=None, + classes=None, + palette=None, + modality=None, + test_mode=False, + ignore_index=None, + scene_idxs=None, + label_weight=None): + + super().__init__( + data_root=data_root, + ann_file=ann_file, + pipeline=pipeline, + classes=classes, + palette=palette, + modality=modality, + test_mode=test_mode, + ignore_index=ignore_index, + scene_idxs=scene_idxs, + label_weight=label_weight) + + def get_ann_info(self, index): + """Get annotation info according to the given index. + + Args: + index (int): Index of the annotation data to get. + + Returns: + dict: annotation information consists of the following keys: + + - pts_semantic_mask_path (str): Path of semantic masks. + """ + # Use index to get the annos, thus the evalhook could also use this api + info = self.data_infos[index] + + pts_semantic_mask_path = osp.join(self.data_root, + info['pts_semantic_mask_path']) + + anns_results = dict(pts_semantic_mask_path=pts_semantic_mask_path) + return anns_results + + def _build_default_pipeline(self): + """Build the default pipeline for this dataset.""" + pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + use_color=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=self.VALID_CLASS_IDS), + dict( + type='DefaultFormatBundle3D', + with_label=False, + class_names=self.CLASSES), + dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) + ] + return Compose(pipeline) + + def show(self, results, out_dir, show=True, pipeline=None): + """Results visualization. + + Args: + results (list[dict]): List of bounding boxes results. + out_dir (str): Output directory of visualization result. + show (bool): Visualize the results online. + pipeline (list[dict], optional): raw data loading for showing. + Default: None. + """ + assert out_dir is not None, 'Expect out_dir, got none.' + pipeline = self._get_pipeline(pipeline) + for i, result in enumerate(results): + data_info = self.data_infos[i] + pts_path = data_info['pts_path'] + file_name = osp.split(pts_path)[-1].split('.')[0] + points, gt_sem_mask = self._extract_data( + i, pipeline, ['points', 'pts_semantic_mask'], load_annos=True) + points = points.numpy() + pred_sem_mask = result['semantic_mask'].numpy() + show_seg_result(points, gt_sem_mask, + pred_sem_mask, out_dir, file_name, + np.array(self.PALETTE), self.ignore_index, show) + + def get_scene_idxs_and_label_weight(self, scene_idxs, label_weight): + """Compute scene_idxs for data sampling and label weight for loss \ + calculation. + + We sample more times for scenes with more points. Label_weight is + inversely proportional to number of class points. + """ + # when testing, we load one whole scene every time + # and we don't need label weight for loss calculation + if not self.test_mode and scene_idxs is None: + raise NotImplementedError( + 'please provide re-sampled scene indexes for training') + + return super().get_scene_idxs_and_label_weight(scene_idxs, + label_weight) + + +@DATASETS.register_module() +class S3DISSegDataset(_S3DISSegDataset): + r"""S3DIS Dataset for Semantic Segmentation Task. + + This class serves as the API for experiments on the S3DIS Dataset. + It wraps the provided datasets of different areas. + We don't use `mmdet.datasets.dataset_wrappers.ConcatDataset` because we + need to concat the `scene_idxs` and `label_weights` of different areas. + + Please refer to the `google form `_ for + data downloading. + + Args: + data_root (str): Path of dataset root. + ann_files (list[str]): Path of several annotation files. + pipeline (list[dict], optional): Pipeline used for data processing. + Defaults to None. + classes (tuple[str], optional): Classes used in the dataset. + Defaults to None. + palette (list[list[int]], optional): The palette of segmentation map. + Defaults to None. + modality (dict, optional): Modality to specify the sensor data used + as input. Defaults to None. + test_mode (bool, optional): Whether the dataset is in test mode. + Defaults to False. + ignore_index (int, optional): The label index to be ignored, e.g. \ + unannotated points. If None is given, set to len(self.CLASSES). + Defaults to None. + scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index + to load data. For scenes with many points, we may sample it several + times. Defaults to None. + label_weights (list[np.ndarray] | list[str], optional): Precomputed + weight to balance loss calculation. If None is given, compute from + data. Defaults to None. + """ + + def __init__(self, + data_root, + ann_files, + pipeline=None, + classes=None, + palette=None, + modality=None, + test_mode=False, + ignore_index=None, + scene_idxs=None, + label_weights=None): + + # make sure that ann_files, scene_idxs and label_weights have same len + ann_files = self._check_ann_files(ann_files) + scene_idxs = self._check_scene_idxs(scene_idxs, len(ann_files)) + label_weights = self._check_label_weights(label_weights, + len(ann_files)) + + # initialize some attributes as datasets[0] + super().__init__( + data_root=data_root, + ann_file=ann_files[0], + pipeline=pipeline, + classes=classes, + palette=palette, + modality=modality, + test_mode=test_mode, + ignore_index=ignore_index, + scene_idxs=scene_idxs[0], + label_weight=label_weights[0]) + + datasets = [ + _S3DISSegDataset( + data_root=data_root, + ann_file=ann_files[i], + pipeline=pipeline, + classes=classes, + palette=palette, + modality=modality, + test_mode=test_mode, + ignore_index=ignore_index, + scene_idxs=scene_idxs[i], + label_weight=label_weights[i]) for i in range(len(ann_files)) + ] + + # data_infos, scene_idxs, label_weight need to be concat + self.concat_data_infos([dst.data_infos for dst in datasets]) + self.concat_scene_idxs([dst.scene_idxs for dst in datasets]) + self.concat_label_weight([dst.label_weight for dst in datasets]) + + # set group flag for the sampler + if not self.test_mode: + self._set_group_flag() + + def concat_data_infos(self, data_infos): + """Concat data_infos from several datasets to form self.data_infos. + + Args: + data_infos (list[list[dict]]) + """ + self.data_infos = [ + info for one_data_infos in data_infos for info in one_data_infos + ] + + def concat_scene_idxs(self, scene_idxs): + """Concat scene_idxs from several datasets to form self.scene_idxs. + + Needs to manually add offset to scene_idxs[1, 2, ...]. + + Args: + scene_idxs (list[np.ndarray]) + """ + self.scene_idxs = np.array([], dtype=np.int32) + offset = 0 + for one_scene_idxs in scene_idxs: + self.scene_idxs = np.concatenate( + [self.scene_idxs, one_scene_idxs + offset]).astype(np.int32) + offset = np.unique(self.scene_idxs).max() + 1 + + def concat_label_weight(self, label_weights): + """Concat label_weight from several datasets to form self.label_weight. + + Args: + label_weights (list[np.ndarray]) + """ + # TODO: simply average them? + self.label_weight = np.array(label_weights).mean(0).astype(np.float32) + + @staticmethod + def _duplicate_to_list(x, num): + """Repeat x `num` times to form a list.""" + return [x for _ in range(num)] + + def _check_ann_files(self, ann_file): + """Make ann_files as list/tuple.""" + # ann_file could be str + if not isinstance(ann_file, (list, tuple)): + ann_file = self._duplicate_to_list(ann_file, 1) + return ann_file + + def _check_scene_idxs(self, scene_idx, num): + """Make scene_idxs as list/tuple.""" + if scene_idx is None: + return self._duplicate_to_list(scene_idx, num) + # scene_idx could be str, np.ndarray, list or tuple + if isinstance(scene_idx, str): # str + return self._duplicate_to_list(scene_idx, num) + if isinstance(scene_idx[0], str): # list of str + return scene_idx + if isinstance(scene_idx[0], (list, tuple, np.ndarray)): # list of idx + return scene_idx + # single idx + return self._duplicate_to_list(scene_idx, num) + + def _check_label_weights(self, label_weight, num): + """Make label_weights as list/tuple.""" + if label_weight is None: + return self._duplicate_to_list(label_weight, num) + # label_weight could be str, np.ndarray, list or tuple + if isinstance(label_weight, str): # str + return self._duplicate_to_list(label_weight, num) + if isinstance(label_weight[0], str): # list of str + return label_weight + if isinstance(label_weight[0], (list, tuple, np.ndarray)): # list of w + return label_weight + # single weight + return self._duplicate_to_list(label_weight, num) diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index 29837cf3af..ef7f61d641 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -276,8 +276,7 @@ def _build_default_pipeline(self): with_seg_3d=True), dict( type='PointSegClassMapping', - valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, - 24, 28, 33, 34, 36, 39)), + valid_cat_ids=self.VALID_CLASS_IDS), dict( type='DefaultFormatBundle3D', with_label=False, diff --git a/tests/data/s3dis/instance_mask/Area_1_office_2.bin b/tests/data/s3dis/instance_mask/Area_1_office_2.bin new file mode 100644 index 0000000000..94ecf2b91d Binary files /dev/null and b/tests/data/s3dis/instance_mask/Area_1_office_2.bin differ diff --git a/tests/data/s3dis/points/Area_1_office_2.bin b/tests/data/s3dis/points/Area_1_office_2.bin new file mode 100644 index 0000000000..260ca5a258 Binary files /dev/null and b/tests/data/s3dis/points/Area_1_office_2.bin differ diff --git a/tests/data/s3dis/s3dis_infos.pkl b/tests/data/s3dis/s3dis_infos.pkl new file mode 100644 index 0000000000..668e277cf4 Binary files /dev/null and b/tests/data/s3dis/s3dis_infos.pkl differ diff --git a/tests/data/s3dis/semantic_mask/Area_1_office_2.bin b/tests/data/s3dis/semantic_mask/Area_1_office_2.bin new file mode 100644 index 0000000000..982299112a Binary files /dev/null and b/tests/data/s3dis/semantic_mask/Area_1_office_2.bin differ diff --git a/tests/test_data/test_datasets/test_s3dis_dataset.py b/tests/test_data/test_datasets/test_s3dis_dataset.py new file mode 100644 index 0000000000..cd295f3471 --- /dev/null +++ b/tests/test_data/test_datasets/test_s3dis_dataset.py @@ -0,0 +1,263 @@ +import numpy as np +import pytest +import torch + +from mmdet3d.datasets import S3DISSegDataset + + +def test_seg_getitem(): + np.random.seed(0) + root_path = './tests/data/s3dis/' + ann_file = './tests/data/s3dis/s3dis_infos.pkl' + class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', + 'door', 'table', 'chair', 'sofa', 'bookcase', 'board', + 'clutter') + palette = [[0, 255, 0], [0, 0, 255], [0, 255, 255], [255, 255, 0], + [255, 0, 255], [100, 100, 255], [200, 200, 100], + [170, 120, 200], [255, 0, 0], [200, 100, 100], [10, 200, 100], + [200, 200, 200], [50, 50, 50]] + scene_idxs = [0 for _ in range(20)] + label_weight = [ + 3.0441623, 3.3606708, 2.6408234, 4.5086737, 4.8403897, 4.7637715, + 4.4685297, 4.7051463, 4.9190116, 5.3899403, 4.6436925, 5.0669650, + 3.6270046 + ] + + pipelines = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + use_color=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=tuple(range(len(class_names)))), + dict( + type='IndoorPatchPointSample', + num_points=5, + block_size=1.0, + sample_rate=1.0, + ignore_index=len(class_names), + use_normalized_coord=True), + dict(type='NormalizePointsColor', color_mean=None), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict( + type='Collect3D', + keys=['points', 'pts_semantic_mask'], + meta_keys=['file_name', 'sample_idx']) + ] + + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=ann_file, + pipeline=pipelines, + classes=None, + palette=None, + modality=None, + test_mode=False, + ignore_index=None, + scene_idxs=scene_idxs, + label_weights=label_weight) + + data = s3dis_dataset[0] + points = data['points']._data + pts_semantic_mask = data['pts_semantic_mask']._data + + file_name = data['img_metas']._data['file_name'] + sample_idx = data['img_metas']._data['sample_idx'] + + assert file_name == './tests/data/s3dis/points/Area_1_office_2.bin' + assert sample_idx == 'Area_1_office_2' + expected_points = torch.tensor([[ + 0.0000, 0.0000, 3.1720, 0.4706, 0.4431, 0.3725, 0.4624, 0.7502, 0.9543 + ], [ + 0.2880, -0.5900, 0.0650, 0.3451, 0.3373, 0.3490, 0.5119, 0.5518, 0.0196 + ], [ + 0.1570, 0.6000, 3.1700, 0.4941, 0.4667, 0.3569, 0.4893, 0.9519, 0.9537 + ], [ + -0.1320, 0.3950, 0.2720, 0.3216, 0.2863, 0.2275, 0.4397, 0.8830, 0.0818 + ], + [ + -0.4860, -0.0640, 3.1710, 0.3843, + 0.3725, 0.3059, 0.3789, 0.7286, 0.9540 + ]]) + expected_pts_semantic_mask = np.array([0, 1, 0, 8, 0]) + original_classes = s3dis_dataset.CLASSES + original_palette = s3dis_dataset.PALETTE + + assert s3dis_dataset.CLASSES == class_names + assert s3dis_dataset.ignore_index == 13 + assert torch.allclose(points, expected_points, 1e-2) + assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask) + assert original_classes == class_names + assert original_palette == palette + assert s3dis_dataset.scene_idxs.dtype == np.int32 + assert np.all(s3dis_dataset.scene_idxs == np.array(scene_idxs)) + assert np.allclose(s3dis_dataset.label_weight, np.array(label_weight), + 1e-5) + + # test dataset with selected classes + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=ann_file, + pipeline=None, + classes=['beam', 'window'], + scene_idxs=scene_idxs) + + label_map = {i: 13 for i in range(14)} + label_map.update({3: 0, 5: 1}) + + assert s3dis_dataset.CLASSES != original_classes + assert s3dis_dataset.CLASSES == ['beam', 'window'] + assert s3dis_dataset.PALETTE == [palette[3], palette[5]] + assert s3dis_dataset.VALID_CLASS_IDS == [3, 5] + assert s3dis_dataset.label_map == label_map + assert s3dis_dataset.label2cat == {0: 'beam', 1: 'window'} + assert np.all(s3dis_dataset.label_weight == np.ones(2)) + + # test load classes from file + import tempfile + tmp_file = tempfile.NamedTemporaryFile() + with open(tmp_file.name, 'w') as f: + f.write('beam\nwindow\n') + + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=ann_file, + pipeline=None, + classes=tmp_file.name, + scene_idxs=scene_idxs) + assert s3dis_dataset.CLASSES != original_classes + assert s3dis_dataset.CLASSES == ['beam', 'window'] + assert s3dis_dataset.PALETTE == [palette[3], palette[5]] + assert s3dis_dataset.VALID_CLASS_IDS == [3, 5] + assert s3dis_dataset.label_map == label_map + assert s3dis_dataset.label2cat == {0: 'beam', 1: 'window'} + + # test scene_idxs in dataset + # we should input scene_idxs in train mode + with pytest.raises(NotImplementedError): + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=ann_file, + pipeline=None, + scene_idxs=None) + + # test mode + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=ann_file, + pipeline=None, + test_mode=True, + scene_idxs=scene_idxs) + assert np.all(s3dis_dataset.scene_idxs == np.array([0])) + assert np.all(s3dis_dataset.label_weight == np.ones(len(class_names))) + + +def test_seg_evaluate(): + if not torch.cuda.is_available(): + pytest.skip() + root_path = './tests/data/s3dis' + ann_file = './tests/data/s3dis/s3dis_infos.pkl' + s3dis_dataset = S3DISSegDataset( + data_root=root_path, ann_files=ann_file, test_mode=True) + results = [] + pred_sem_mask = dict( + semantic_mask=torch.tensor([ + 2, 3, 1, 2, 2, 6, 1, 0, 1, 1, 9, 12, 3, 0, 2, 0, 2, 0, 8, 3, 1, 2, + 0, 2, 1, 7, 2, 10, 2, 0, 0, 0, 2, 3, 2, 2, 2, 2, 2, 3, 0, 0, 4, 6, + 7, 2, 1, 2, 0, 1, 7, 0, 2, 2, 2, 0, 2, 2, 1, 12, 0, 2, 2, 2, 2, 7, + 2, 2, 0, 2, 6, 2, 12, 6, 3, 12, 2, 1, 6, 1, 2, 6, 8, 2, 10, 1, 11, + 0, 6, 9, 4, 3, 0, 0, 12, 1, 1, 5, 3, 2 + ]).long()) + results.append(pred_sem_mask) + ret_dict = s3dis_dataset.evaluate(results) + assert abs(ret_dict['miou'] - 0.7625) < 0.01 + assert abs(ret_dict['acc'] - 0.9) < 0.01 + assert abs(ret_dict['acc_cls'] - 0.9074) < 0.01 + + +def test_seg_show(): + import mmcv + import tempfile + from os import path as osp + + tmp_dir = tempfile.TemporaryDirectory() + temp_dir = tmp_dir.name + root_path = './tests/data/s3dis' + ann_file = './tests/data/s3dis/s3dis_infos.pkl' + s3dis_dataset = S3DISSegDataset( + data_root=root_path, ann_files=ann_file, scene_idxs=[0]) + result = dict( + semantic_mask=torch.tensor([ + 2, 2, 1, 2, 2, 5, 1, 0, 1, 1, 9, 12, 3, 0, 2, 0, 2, 0, 8, 2, 0, 2, + 0, 2, 1, 7, 2, 10, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 2, 2, 0, 0, 4, 6, + 7, 2, 1, 2, 0, 1, 7, 0, 2, 2, 2, 0, 2, 2, 1, 12, 0, 2, 2, 2, 2, 7, + 2, 2, 0, 2, 6, 2, 12, 6, 2, 12, 2, 1, 6, 1, 2, 6, 8, 2, 10, 1, 10, + 0, 6, 9, 4, 3, 0, 0, 12, 1, 1, 5, 2, 2 + ]).long()) + results = [result] + s3dis_dataset.show(results, temp_dir, show=False) + pts_file_path = osp.join(temp_dir, 'Area_1_office_2', + 'Area_1_office_2_points.obj') + gt_file_path = osp.join(temp_dir, 'Area_1_office_2', + 'Area_1_office_2_gt.obj') + pred_file_path = osp.join(temp_dir, 'Area_1_office_2', + 'Area_1_office_2_pred.obj') + mmcv.check_file_exist(pts_file_path) + mmcv.check_file_exist(gt_file_path) + mmcv.check_file_exist(pred_file_path) + tmp_dir.cleanup() + + +def test_multi_areas(): + # S3DIS dataset has 6 areas, we often train on several of them + # need to verify the concat function of S3DISSegDataset + root_path = './tests/data/s3dis' + ann_file = './tests/data/s3dis/s3dis_infos.pkl' + class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', + 'door', 'table', 'chair', 'sofa', 'bookcase', 'board', + 'clutter') + palette = [[0, 255, 0], [0, 0, 255], [0, 255, 255], [255, 255, 0], + [255, 0, 255], [100, 100, 255], [200, 200, 100], + [170, 120, 200], [255, 0, 0], [200, 100, 100], [10, 200, 100], + [200, 200, 200], [50, 50, 50]] + scene_idxs = [0 for _ in range(20)] + label_weight = [ + 3.0441623, 3.3606708, 2.6408234, 4.5086737, 4.8403897, 4.7637715, + 4.4685297, 4.7051463, 4.9190116, 5.3899403, 4.6436925, 5.0669650, + 3.6270046 + ] + + # repeat + repeat_num = 3 + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=[ann_file for _ in range(repeat_num)], + scene_idxs=scene_idxs, + label_weights=label_weight) + assert s3dis_dataset.CLASSES == class_names + assert s3dis_dataset.PALETTE == palette + assert len(s3dis_dataset.data_infos) == repeat_num + assert np.all(s3dis_dataset.scene_idxs == np.concatenate( + [np.array(scene_idxs) + i for i in range(repeat_num)])) + assert np.allclose(s3dis_dataset.label_weight, np.array(label_weight)) + + # different scene_idxs and label_weight input + label_weights = np.random.rand(repeat_num, len(class_names)) + s3dis_dataset = S3DISSegDataset( + data_root=root_path, + ann_files=[ann_file for _ in range(repeat_num)], + scene_idxs=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 3, 4], [0, 1, 1, 2, 2, 2]], + label_weights=label_weights) + assert np.all(s3dis_dataset.scene_idxs == np.array( + [0, 0, 1, 2, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 10, 10])) + assert np.allclose(s3dis_dataset.label_weight, label_weights.mean(0)) diff --git a/tests/test_data/test_pipelines/test_indoor_pipeline.py b/tests/test_data/test_pipelines/test_indoor_pipeline.py index 23d9e6f249..66015e681b 100644 --- a/tests/test_data/test_pipelines/test_indoor_pipeline.py +++ b/tests/test_data/test_pipelines/test_indoor_pipeline.py @@ -175,6 +175,75 @@ def test_scannet_seg_pipeline(): assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask) +def test_s3dis_seg_pipeline(): + class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', + 'door', 'table', 'chair', 'sofa', 'bookcase', 'board', + 'clutter') + + np.random.seed(0) + pipelines = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + use_color=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='LoadAnnotations3D', + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True), + dict( + type='PointSegClassMapping', + valid_cat_ids=tuple(range(len(class_names)))), + dict( + type='IndoorPatchPointSample', + num_points=5, + block_size=1.0, + sample_rate=1.0, + ignore_index=len(class_names), + use_normalized_coord=True), + dict(type='NormalizePointsColor', color_mean=None), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) + ] + pipeline = Compose(pipelines) + info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')[0] + results = dict() + data_path = './tests/data/s3dis' + results['pts_filename'] = osp.join(data_path, info['pts_path']) + results['ann_info'] = dict() + results['ann_info']['pts_semantic_mask_path'] = osp.join( + data_path, info['pts_semantic_mask_path']) + + results['pts_seg_fields'] = [] + + results = pipeline(results) + + points = results['points']._data + pts_semantic_mask = results['pts_semantic_mask']._data + + # build sampled points + s3dis_points = np.fromfile( + osp.join(data_path, info['pts_path']), dtype=np.float32).reshape( + (-1, 6)) + s3dis_choices = np.array([87, 37, 60, 18, 31]) + s3dis_center = np.array([2.691, 2.231, 3.172]) + s3dis_center[2] = 0.0 + s3dis_coord_max = np.amax(s3dis_points[:, :3], axis=0) + expected_points = np.concatenate([ + s3dis_points[s3dis_choices, :3] - s3dis_center, + s3dis_points[s3dis_choices, 3:] / 255., + s3dis_points[s3dis_choices, :3] / s3dis_coord_max + ], + axis=1) + expected_pts_semantic_mask = np.array([0, 1, 0, 8, 0]) + assert np.allclose(points.numpy(), expected_points, atol=1e-6) + assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask) + + def test_sunrgbd_pipeline(): class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 'night_stand', 'bookshelf', 'bathtub') diff --git a/tests/test_data/test_pipelines/test_indoor_sample.py b/tests/test_data/test_pipelines/test_indoor_sample.py index 594dade877..5c79f6c6d0 100644 --- a/tests/test_data/test_pipelines/test_indoor_sample.py +++ b/tests/test_data/test_pipelines/test_indoor_sample.py @@ -96,8 +96,7 @@ def test_indoor_seg_sample(): scannet_points[scannet_choices, :3] - scannet_center, scannet_points[scannet_choices, 3:], scannet_points[scannet_choices, :3] / scannet_coord_max - ], - axis=1) + ], 1) assert scannet_points_result.points_dim == 9 assert scannet_points_result.attribute_dims == dict( @@ -115,3 +114,39 @@ def test_indoor_seg_sample(): 'use_normalized_coord=True, ' \ 'num_try=10)' assert repr_str == expected_repr_str + + # test on S3DIS dataset + np.random.seed(0) + s3dis_patch_sample_points = IndoorPatchPointSample(5, 1.0, 1.0, None, True) + s3dis_results = dict() + s3dis_points = np.fromfile( + './tests/data/s3dis/points/Area_1_office_2.bin', + dtype=np.float32).reshape((-1, 6)) + s3dis_results['points'] = DepthPoints( + s3dis_points, points_dim=6, attribute_dims=dict(color=[3, 4, 5])) + + s3dis_pts_semantic_mask = np.fromfile( + './tests/data/s3dis/semantic_mask/Area_1_office_2.bin', dtype=np.long) + s3dis_results['pts_semantic_mask'] = s3dis_pts_semantic_mask + + s3dis_results = s3dis_patch_sample_points(s3dis_results) + s3dis_points_result = s3dis_results['points'] + s3dis_semantic_labels_result = s3dis_results['pts_semantic_mask'] + + # manually constructed sampled points + s3dis_choices = np.array([87, 37, 60, 18, 31]) + s3dis_center = np.array([2.691, 2.231, 3.172]) + s3dis_center[2] = 0.0 + s3dis_coord_max = np.amax(s3dis_points[:, :3], axis=0) + s3dis_input_points = np.concatenate([ + s3dis_points[s3dis_choices, :3] - s3dis_center, + s3dis_points[s3dis_choices, + 3:], s3dis_points[s3dis_choices, :3] / s3dis_coord_max + ], 1) + + assert s3dis_points_result.points_dim == 9 + assert s3dis_points_result.attribute_dims == dict( + color=[3, 4, 5], normalized_coord=[6, 7, 8]) + s3dis_points_result = s3dis_points_result.tensor.numpy() + assert np.allclose(s3dis_input_points, s3dis_points_result, atol=1e-6) + assert np.all(np.array([0, 1, 0, 8, 0]) == s3dis_semantic_labels_result) diff --git a/tests/test_data/test_pipelines/test_loadings/test_loading.py b/tests/test_data/test_pipelines/test_loadings/test_loading.py index d838ab8b73..bd9090bee5 100644 --- a/tests/test_data/test_pipelines/test_loadings/test_loading.py +++ b/tests/test_data/test_pipelines/test_loadings/test_loading.py @@ -12,6 +12,7 @@ def test_load_points_from_indoor_file(): + # test on SUN RGB-D dataset with shifted height sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl') sunrgbd_load_points_from_file = LoadPointsFromFile( coord_type='DEPTH', load_dim=6, shift_height=True) @@ -31,6 +32,7 @@ def test_load_points_from_indoor_file(): data_path = './tests/data/scannet' scannet_info = scannet_info[0] + # test on ScanNet dataset with shifted height scannet_results['pts_filename'] = osp.join(data_path, scannet_info['pts_path']) scannet_results = scannet_load_data(scannet_results) @@ -64,6 +66,28 @@ def test_load_points_from_indoor_file(): scannet_point_cloud = scannet_point_cloud.tensor.numpy() assert scannet_point_cloud.shape == (100, 7) + # test load point cloud on S3DIS with color + data_path = './tests/data/s3dis' + s3dis_info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl') + s3dis_info = s3dis_info[0] + s3dis_load_data = LoadPointsFromFile( + coord_type='DEPTH', + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5], + shift_height=False, + use_color=True) + + s3dis_results = dict() + + s3dis_results['pts_filename'] = osp.join(data_path, s3dis_info['pts_path']) + s3dis_results = s3dis_load_data(s3dis_results) + s3dis_point_cloud = s3dis_results['points'] + assert s3dis_point_cloud.points_dim == 6 + assert s3dis_point_cloud.attribute_dims == dict(color=[3, 4, 5]) + + s3dis_point_cloud = s3dis_point_cloud.tensor.numpy() + assert s3dis_point_cloud.shape == (100, 6) + def test_load_points_from_outdoor_file(): data_path = 'tests/data/kitti/a.bin' @@ -141,6 +165,33 @@ def test_load_annotations3D(): assert scannet_pts_instance_mask.shape == (100, ) assert scannet_pts_semantic_mask.shape == (100, ) + # Test s3dis LoadAnnotations3D + s3dis_info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')[0] + s3dis_load_annotations3D = LoadAnnotations3D( + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=True, + with_seg_3d=True) + s3dis_results = dict() + data_path = './tests/data/s3dis' + + # prepare input of loading pipeline + s3dis_results['ann_info'] = dict() + s3dis_results['ann_info']['pts_instance_mask_path'] = osp.join( + data_path, s3dis_info['pts_instance_mask_path']) + s3dis_results['ann_info']['pts_semantic_mask_path'] = osp.join( + data_path, s3dis_info['pts_semantic_mask_path']) + + s3dis_results['pts_mask_fields'] = [] + s3dis_results['pts_seg_fields'] = [] + + s3dis_results = s3dis_load_annotations3D(s3dis_results) + + s3dis_pts_instance_mask = s3dis_results['pts_instance_mask'] + s3dis_pts_semantic_mask = s3dis_results['pts_semantic_mask'] + assert s3dis_pts_instance_mask.shape == (100, ) + assert s3dis_pts_semantic_mask.shape == (100, ) + def test_load_segmentation_mask(): # Test loading semantic segmentation mask on ScanNet dataset @@ -178,6 +229,39 @@ def test_load_segmentation_mask(): 12, 2, 20, 0, 0, 13, 20, 1, 20, 5, 3, 0, 13, 1, 2, 2, 2, 1 ])) + # Test on S3DIS dataset + s3dis_info = mmcv.load('./tests/data/s3dis/s3dis_infos.pkl')[0] + s3dis_load_annotations3D = LoadAnnotations3D( + with_bbox_3d=False, + with_label_3d=False, + with_mask_3d=False, + with_seg_3d=True) + s3dis_results = dict() + data_path = './tests/data/s3dis' + + # prepare input of loading pipeline + s3dis_results['ann_info'] = dict() + s3dis_results['ann_info']['pts_semantic_mask_path'] = osp.join( + data_path, s3dis_info['pts_semantic_mask_path']) + s3dis_results['pts_seg_fields'] = [] + + s3dis_results = s3dis_load_annotations3D(s3dis_results) + s3dis_pts_semantic_mask = s3dis_results['pts_semantic_mask'] + assert s3dis_pts_semantic_mask.shape == (100, ) + + # Convert class_id to label and assign ignore_index + s3dis_seg_class_mapping = PointSegClassMapping(tuple(range(13))) + s3dis_results = s3dis_seg_class_mapping(s3dis_results) + s3dis_pts_semantic_mask = s3dis_results['pts_semantic_mask'] + + assert np.all(s3dis_pts_semantic_mask == np.array([ + 2, 2, 1, 2, 2, 5, 1, 0, 1, 1, 9, 12, 3, 0, 2, 0, 2, 0, 8, 2, 0, 2, 0, + 2, 1, 7, 2, 10, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 2, 2, 0, 0, 4, 6, 7, 2, + 1, 2, 0, 1, 7, 0, 2, 2, 2, 0, 2, 2, 1, 12, 0, 2, 2, 2, 2, 7, 2, 2, 0, + 2, 6, 2, 12, 6, 2, 12, 2, 1, 6, 1, 2, 6, 8, 2, 10, 1, 10, 0, 6, 9, 4, + 3, 0, 0, 12, 1, 1, 5, 2, 2 + ])) + def test_load_points_from_multi_sweeps(): load_points_from_multi_sweeps = LoadPointsFromMultiSweeps() diff --git a/tools/create_data.py b/tools/create_data.py index f180d86ff5..b761f3e377 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -134,6 +134,19 @@ def scannet_data_prep(root_path, info_prefix, out_dir, workers): root_path, info_prefix, out_dir, workers=workers) +def s3dis_data_prep(root_path, info_prefix, out_dir, workers): + """Prepare the info file for s3dis dataset. + + Args: + root_path (str): Path of dataset root. + info_prefix (str): The prefix of info filenames. + out_dir (str): Output directory of the generated info file. + workers (int): Number of threads to be used. + """ + indoor.create_indoor_info_file( + root_path, info_prefix, out_dir, workers=workers) + + def sunrgbd_data_prep(root_path, info_prefix, out_dir, workers): """Prepare the info file for sunrgbd dataset. @@ -285,6 +298,12 @@ def waymo_data_prep(root_path, info_prefix=args.extra_tag, out_dir=args.out_dir, workers=args.workers) + elif args.dataset == 's3dis': + s3dis_data_prep( + root_path=args.root_path, + info_prefix=args.extra_tag, + out_dir=args.out_dir, + workers=args.workers) elif args.dataset == 'sunrgbd': sunrgbd_data_prep( root_path=args.root_path, diff --git a/tools/data_converter/indoor_converter.py b/tools/data_converter/indoor_converter.py index 8563362ec6..7654f75e86 100644 --- a/tools/data_converter/indoor_converter.py +++ b/tools/data_converter/indoor_converter.py @@ -2,6 +2,7 @@ import numpy as np import os +from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData from tools.data_converter.sunrgbd_data_utils import SUNRGBDData @@ -23,30 +24,38 @@ def create_indoor_info_file(data_path, workers (int): Number of threads to be used. Default: 4. """ assert os.path.exists(data_path) - assert pkl_prefix in ['sunrgbd', 'scannet'] + assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \ + f'unsupported indoor dataset {pkl_prefix}' save_path = data_path if save_path is None else save_path assert os.path.exists(save_path) - train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl') - val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl') - if pkl_prefix == 'sunrgbd': - train_dataset = SUNRGBDData( - root_path=data_path, split='train', use_v1=use_v1) - val_dataset = SUNRGBDData( - root_path=data_path, split='val', use_v1=use_v1) - else: - train_dataset = ScanNetData(root_path=data_path, split='train') - val_dataset = ScanNetData(root_path=data_path, split='val') - test_dataset = ScanNetData(root_path=data_path, split='test') - test_filename = os.path.join(save_path, f'{pkl_prefix}_infos_test.pkl') + # generate infos for both detection and segmentation task + if pkl_prefix in ['sunrgbd', 'scannet']: + train_filename = os.path.join(save_path, + f'{pkl_prefix}_infos_train.pkl') + val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl') + if pkl_prefix == 'sunrgbd': + # SUN RGB-D has a train-val split + train_dataset = SUNRGBDData( + root_path=data_path, split='train', use_v1=use_v1) + val_dataset = SUNRGBDData( + root_path=data_path, split='val', use_v1=use_v1) + else: + # ScanNet has a train-val-test split + train_dataset = ScanNetData(root_path=data_path, split='train') + val_dataset = ScanNetData(root_path=data_path, split='val') + test_dataset = ScanNetData(root_path=data_path, split='test') + test_filename = os.path.join(save_path, + f'{pkl_prefix}_infos_test.pkl') - infos_train = train_dataset.get_infos(num_workers=workers, has_label=True) - mmcv.dump(infos_train, train_filename, 'pkl') - print(f'{pkl_prefix} info train file is saved to {train_filename}') + infos_train = train_dataset.get_infos( + num_workers=workers, has_label=True) + mmcv.dump(infos_train, train_filename, 'pkl') + print(f'{pkl_prefix} info train file is saved to {train_filename}') - infos_val = val_dataset.get_infos(num_workers=workers, has_label=True) - mmcv.dump(infos_val, val_filename, 'pkl') - print(f'{pkl_prefix} info val file is saved to {val_filename}') + infos_val = val_dataset.get_infos(num_workers=workers, has_label=True) + mmcv.dump(infos_val, val_filename, 'pkl') + print(f'{pkl_prefix} info val file is saved to {val_filename}') if pkl_prefix == 'scannet': infos_test = test_dataset.get_infos( @@ -56,6 +65,8 @@ def create_indoor_info_file(data_path, # generate infos for the semantic segmentation task # e.g. re-sampled scene indexes and label weights + # scene indexes are used to re-sample rooms with different number of points + # label weights are used to balance classes with different number of points if pkl_prefix == 'scannet': # label weight computation function is adopted from # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24 @@ -73,6 +84,24 @@ def create_indoor_info_file(data_path, num_points=8192, label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) # no need to generate for test set - train_dataset.get_seg_infos() val_dataset.get_seg_infos() + else: + # S3DIS doesn't have a fixed train-val split + # it has 6 areas instead, so we generate info file for each of them + # in training, we will use dataset to wrap different areas + splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]] + for split in splits: + dataset = S3DISData(root_path=data_path, split=split) + info = dataset.get_infos(num_workers=workers, has_label=True) + filename = os.path.join(save_path, + f'{pkl_prefix}_infos_{split}.pkl') + mmcv.dump(info, filename, 'pkl') + print(f'{pkl_prefix} info {split} file is saved to {filename}') + seg_dataset = S3DISSegData( + data_root=data_path, + ann_file=filename, + split=split, + num_points=4096, + label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) + seg_dataset.get_seg_infos() diff --git a/tools/data_converter/s3dis_data_utils.py b/tools/data_converter/s3dis_data_utils.py new file mode 100644 index 0000000000..b9b809127e --- /dev/null +++ b/tools/data_converter/s3dis_data_utils.py @@ -0,0 +1,202 @@ +import mmcv +import numpy as np +import os +from concurrent import futures as futures +from os import path as osp + + +class S3DISData(object): + """S3DIS data. + + Generate s3dis infos for s3dis_converter. + + Args: + root_path (str): Root path of the raw data. + split (str): Set split type of the data. Default: 'Area_1'. + """ + + def __init__(self, root_path, split='Area_1'): + self.root_dir = root_path + self.split = split + self.data_dir = osp.join(root_path, + 'Stanford3dDataset_v1.2_Aligned_Version') + self.classes = [ + 'ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', + 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter' + ] + self.cat2label = {cat: self.classes.index(cat) for cat in self.classes} + self.label2cat = {self.cat2label[t]: t for t in self.cat2label} + self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) + self.cat_ids2class = { + cat_id: i + for i, cat_id in enumerate(list(self.cat_ids)) + } + assert split in [ + 'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6' + ] + self.sample_id_list = os.listdir(osp.join(self.data_dir, + split)) # conferenceRoom_1 + for sample_id in self.sample_id_list: + if os.path.isfile(osp.join(self.data_dir, split, sample_id)): + self.sample_id_list.remove(sample_id) + + def __len__(self): + return len(self.sample_id_list) + + def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): + """Get data infos. + + This method gets information from the raw data. + + Args: + num_workers (int): Number of threads to be used. Default: 4. + has_label (bool): Whether the data has label. Default: True. + sample_id_list (list[int]): Index list of the sample. + Default: None. + + Returns: + infos (list[dict]): Information of the raw data. + """ + + def process_single_scene(sample_idx): + print(f'{self.split} sample_idx: {sample_idx}') + info = dict() + pc_info = { + 'num_features': 6, + 'lidar_idx': f'{self.split}_{sample_idx}' + } + info['point_cloud'] = pc_info + pts_filename = osp.join(self.root_dir, 's3dis_data', + f'{self.split}_{sample_idx}_point.npy') + pts_instance_mask_path = osp.join( + self.root_dir, 's3dis_data', + f'{self.split}_{sample_idx}_ins_label.npy') + pts_semantic_mask_path = osp.join( + self.root_dir, 's3dis_data', + f'{self.split}_{sample_idx}_sem_label.npy') + + points = np.load(pts_filename).astype(np.float32) + pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int) + pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int) + + mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points')) + mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask')) + mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask')) + + points.tofile( + osp.join(self.root_dir, 'points', + f'{self.split}_{sample_idx}.bin')) + pts_instance_mask.tofile( + osp.join(self.root_dir, 'instance_mask', + f'{self.split}_{sample_idx}.bin')) + pts_semantic_mask.tofile( + osp.join(self.root_dir, 'semantic_mask', + f'{self.split}_{sample_idx}.bin')) + + info['pts_path'] = osp.join('points', + f'{self.split}_{sample_idx}.bin') + info['pts_instance_mask_path'] = osp.join( + 'instance_mask', f'{self.split}_{sample_idx}.bin') + info['pts_semantic_mask_path'] = osp.join( + 'semantic_mask', f'{self.split}_{sample_idx}.bin') + + return info + + sample_id_list = sample_id_list if sample_id_list is not None \ + else self.sample_id_list + with futures.ThreadPoolExecutor(num_workers) as executor: + infos = executor.map(process_single_scene, sample_id_list) + return list(infos) + + +class S3DISSegData(object): + """S3DIS dataset used to generate infos for semantic segmentation task. + + Args: + data_root (str): Root path of the raw data. + ann_file (str): The generated scannet infos. + split (str): Set split type of the data. Default: 'train'. + num_points (int): Number of points in each data input. Default: 8192. + label_weight_func (function): Function to compute the label weight. + Default: None. + """ + + def __init__(self, + data_root, + ann_file, + split='Area_1', + num_points=4096, + label_weight_func=None): + self.data_root = data_root + self.data_infos = mmcv.load(ann_file) + self.split = split + self.num_points = num_points + + self.all_ids = np.arange(13) # all possible ids + self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12]) # used for seg task + self.ignore_index = len(self.cat_ids) + + self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \ + self.ignore_index + for i, cat_id in enumerate(self.cat_ids): + self.cat_id2class[cat_id] = i + + # label weighting function is taken from + # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24 + self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \ + label_weight_func is None else label_weight_func + + def get_seg_infos(self): + scene_idxs, label_weight = self.get_scene_idxs_and_label_weight() + save_folder = osp.join(self.data_root, 'seg_info') + mmcv.mkdir_or_exist(save_folder) + np.save( + osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'), + scene_idxs) + np.save( + osp.join(save_folder, f'{self.split}_label_weight.npy'), + label_weight) + print(f'{self.split} resampled scene index and label weight saved') + + def _convert_to_label(self, mask): + """Convert class_id in loaded segmentation mask to label.""" + if isinstance(mask, str): + if mask.endswith('npy'): + mask = np.load(mask) + else: + mask = np.fromfile(mask, dtype=np.long) + label = self.cat_id2class[mask] + return label + + def get_scene_idxs_and_label_weight(self): + """Compute scene_idxs for data sampling and label weight for loss \ + calculation. + + We sample more times for scenes with more points. Label_weight is + inversely proportional to number of class points. + """ + num_classes = len(self.cat_ids) + num_point_all = [] + label_weight = np.zeros((num_classes + 1, )) # ignore_index + for data_info in self.data_infos: + label = self._convert_to_label( + osp.join(self.data_root, data_info['pts_semantic_mask_path'])) + num_point_all.append(label.shape[0]) + class_count, _ = np.histogram(label, range(num_classes + 2)) + label_weight += class_count + + # repeat scene_idx for num_scene_point // num_sample_point times + sample_prob = np.array(num_point_all) / float(np.sum(num_point_all)) + num_iter = int(np.sum(num_point_all) / float(self.num_points)) + scene_idxs = [] + for idx in range(len(self.data_infos)): + scene_idxs.extend([idx] * round(sample_prob[idx] * num_iter)) + scene_idxs = np.array(scene_idxs).astype(np.int32) + + # calculate label weight, adopted from PointNet++ + label_weight = label_weight[:-1].astype(np.float32) + label_weight = label_weight / label_weight.sum() + label_weight = self.label_weight_func(label_weight).astype(np.float32) + + return scene_idxs, label_weight