diff --git a/configs/_base_/datasets/s3dis_seg-3d-13class.py b/configs/_base_/datasets/s3dis_seg-3d-13class.py index 62c2c2265a..e2dcab8099 100644 --- a/configs/_base_/datasets/s3dis_seg-3d-13class.py +++ b/configs/_base_/datasets/s3dis_seg-3d-13class.py @@ -115,10 +115,6 @@ scene_idxs=[ data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area - ], - label_weights=[ - data_root + f'seg_info/Area_{i}_label_weight.npy' - for i in train_area ]), val=dict( type=dataset_type, diff --git a/configs/_base_/datasets/scannet_seg-3d-20class.py b/configs/_base_/datasets/scannet_seg-3d-20class.py index 2b3e786cb1..5d9b56f917 100644 --- a/configs/_base_/datasets/scannet_seg-3d-20class.py +++ b/configs/_base_/datasets/scannet_seg-3d-20class.py @@ -110,8 +110,7 @@ classes=class_names, test_mode=False, ignore_index=len(class_names), - scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy', - label_weight=data_root + 'seg_info/train_label_weight.npy'), + scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'), val=dict( type=dataset_type, data_root=data_root, diff --git a/docs/compatibility.md b/docs/compatibility.md index 172bb44c90..34c871fc25 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -2,6 +2,12 @@ This document provides detailed descriptions of the BC-breaking changes in MMDetection3D. +## MMDetection3D 0.15.0 + +### Dataset class for 3D segmentation task + +We remove a useless parameter `label_weight` from segmentation datasets including `Custom3DSegDataset`, `ScanNetSegDataset` and `S3DISSegDataset` since this weight is utilized in the loss function of model class. Please modify the code as well as the config files accordingly if you use or inherit from these codes. + ## MMDetection3D 0.14.0 ### ScanNet data pre-processing diff --git a/mmdet3d/datasets/custom_3d_seg.py b/mmdet3d/datasets/custom_3d_seg.py index cc47ffde2b..afd1b45051 100644 --- a/mmdet3d/datasets/custom_3d_seg.py +++ b/mmdet3d/datasets/custom_3d_seg.py @@ -38,9 +38,6 @@ class Custom3DSegDataset(Dataset): scene_idxs (np.ndarray | str, optional): Precomputed index to load data. For scenes with many points, we may sample it several times. Defaults to None. - label_weight (np.ndarray | str, optional): Precomputed weight to \ - balance loss calculation. If None is given, use equal weighting. - Defaults to None. """ # names of all classes data used for the task CLASSES = None @@ -63,8 +60,7 @@ def __init__(self, modality=None, test_mode=False, ignore_index=None, - scene_idxs=None, - label_weight=None): + scene_idxs=None): super().__init__() self.data_root = data_root self.ann_file = ann_file @@ -79,8 +75,7 @@ def __init__(self, self.ignore_index = len(self.CLASSES) if \ ignore_index is None else ignore_index - self.scene_idxs, self.label_weight = \ - self.get_scene_idxs_and_label_weight(scene_idxs, label_weight) + self.scene_idxs = self.get_scene_idxs(scene_idxs) self.CLASSES, self.PALETTE = \ self.get_classes_and_palette(classes, palette) @@ -250,26 +245,16 @@ def get_classes_and_palette(self, classes=None, palette=None): for cls_name in class_names ] - # also need to modify self.label_weight - self.label_weight = np.array([ - self.label_weight[self.CLASSES.index(cls_name)] - for cls_name in class_names - ]).astype(np.float32) - return class_names, palette - def get_scene_idxs_and_label_weight(self, scene_idxs, label_weight): - """Compute scene_idxs for data sampling and label weight for loss \ - calculation. + def get_scene_idxs(self, scene_idxs): + """Compute scene_idxs for data sampling. - We sample more times for scenes with more points. Label_weight is - inversely proportional to number of class points. + We sample more times for scenes with more points. """ if self.test_mode: # when testing, we load one whole scene every time - # and we don't need label weight for loss calculation - return np.arange(len(self.data_infos)).astype(np.int32), \ - np.ones(len(self.CLASSES)).astype(np.float32) + return np.arange(len(self.data_infos)).astype(np.int32) # we may need to re-sample different scenes according to scene_idxs # this is necessary for indoor scene segmentation such as ScanNet @@ -280,15 +265,7 @@ def get_scene_idxs_and_label_weight(self, scene_idxs, label_weight): else: scene_idxs = np.array(scene_idxs) - if label_weight is None: - # we don't used label weighting in training - label_weight = np.ones(len(self.CLASSES)) - elif isinstance(label_weight, str): - label_weight = np.load(label_weight) - else: - label_weight = np.array(label_weight) - - return scene_idxs.astype(np.int32), label_weight.astype(np.float32) + return scene_idxs.astype(np.int32) def format_results(self, outputs, diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py index d3058eb353..185f35ad2d 100644 --- a/mmdet3d/datasets/s3dis_dataset.py +++ b/mmdet3d/datasets/s3dis_dataset.py @@ -41,9 +41,6 @@ class _S3DISSegDataset(Custom3DSegDataset): scene_idxs (np.ndarray | str, optional): Precomputed index to load data. For scenes with many points, we may sample it several times. Defaults to None. - label_weight (np.ndarray | str, optional): Precomputed weight to \ - balance loss calculation. If None is given, compute from data. - Defaults to None. """ CLASSES = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') @@ -66,8 +63,7 @@ def __init__(self, modality=None, test_mode=False, ignore_index=None, - scene_idxs=None, - label_weight=None): + scene_idxs=None): super().__init__( data_root=data_root, @@ -78,8 +74,7 @@ def __init__(self, modality=modality, test_mode=test_mode, ignore_index=ignore_index, - scene_idxs=scene_idxs, - label_weight=label_weight) + scene_idxs=scene_idxs) def get_ann_info(self, index): """Get annotation info according to the given index. @@ -153,21 +148,17 @@ def show(self, results, out_dir, show=True, pipeline=None): pred_sem_mask, out_dir, file_name, np.array(self.PALETTE), self.ignore_index, show) - def get_scene_idxs_and_label_weight(self, scene_idxs, label_weight): - """Compute scene_idxs for data sampling and label weight for loss \ - calculation. + def get_scene_idxs(self, scene_idxs): + """Compute scene_idxs for data sampling. - We sample more times for scenes with more points. Label_weight is - inversely proportional to number of class points. + We sample more times for scenes with more points. """ # when testing, we load one whole scene every time - # and we don't need label weight for loss calculation if not self.test_mode and scene_idxs is None: raise NotImplementedError( 'please provide re-sampled scene indexes for training') - return super().get_scene_idxs_and_label_weight(scene_idxs, - label_weight) + return super().get_scene_idxs(scene_idxs) @DATASETS.register_module() @@ -178,7 +169,7 @@ class S3DISSegDataset(_S3DISSegDataset): This class serves as the API for experiments on the S3DIS Dataset. It wraps the provided datasets of different areas. We don't use `mmdet.datasets.dataset_wrappers.ConcatDataset` because we - need to concat the `scene_idxs` and `label_weights` of different areas. + need to concat the `scene_idxs` of different areas. Please refer to the `google form `_ for @@ -203,9 +194,6 @@ class S3DISSegDataset(_S3DISSegDataset): scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index to load data. For scenes with many points, we may sample it several times. Defaults to None. - label_weights (list[np.ndarray] | list[str], optional): Precomputed - weight to balance loss calculation. If None is given, compute from - data. Defaults to None. """ def __init__(self, @@ -217,14 +205,11 @@ def __init__(self, modality=None, test_mode=False, ignore_index=None, - scene_idxs=None, - label_weights=None): + scene_idxs=None): - # make sure that ann_files, scene_idxs and label_weights have same len + # make sure that ann_files and scene_idxs have same length ann_files = self._check_ann_files(ann_files) scene_idxs = self._check_scene_idxs(scene_idxs, len(ann_files)) - label_weights = self._check_label_weights(label_weights, - len(ann_files)) # initialize some attributes as datasets[0] super().__init__( @@ -236,8 +221,7 @@ def __init__(self, modality=modality, test_mode=test_mode, ignore_index=ignore_index, - scene_idxs=scene_idxs[0], - label_weight=label_weights[0]) + scene_idxs=scene_idxs[0]) datasets = [ _S3DISSegDataset( @@ -249,14 +233,12 @@ def __init__(self, modality=modality, test_mode=test_mode, ignore_index=ignore_index, - scene_idxs=scene_idxs[i], - label_weight=label_weights[i]) for i in range(len(ann_files)) + scene_idxs=scene_idxs[i]) for i in range(len(ann_files)) ] - # data_infos, scene_idxs, label_weight need to be concat + # data_infos and scene_idxs need to be concat self.concat_data_infos([dst.data_infos for dst in datasets]) self.concat_scene_idxs([dst.scene_idxs for dst in datasets]) - self.concat_label_weight([dst.label_weight for dst in datasets]) # set group flag for the sampler if not self.test_mode: @@ -287,15 +269,6 @@ def concat_scene_idxs(self, scene_idxs): [self.scene_idxs, one_scene_idxs + offset]).astype(np.int32) offset = np.unique(self.scene_idxs).max() + 1 - def concat_label_weight(self, label_weights): - """Concat label_weight from several datasets to form self.label_weight. - - Args: - label_weights (list[np.ndarray]) - """ - # TODO: simply average them? - self.label_weight = np.array(label_weights).mean(0).astype(np.float32) - @staticmethod def _duplicate_to_list(x, num): """Repeat x `num` times to form a list.""" @@ -321,17 +294,3 @@ def _check_scene_idxs(self, scene_idx, num): return scene_idx # single idx return self._duplicate_to_list(scene_idx, num) - - def _check_label_weights(self, label_weight, num): - """Make label_weights as list/tuple.""" - if label_weight is None: - return self._duplicate_to_list(label_weight, num) - # label_weight could be str, np.ndarray, list or tuple - if isinstance(label_weight, str): # str - return self._duplicate_to_list(label_weight, num) - if isinstance(label_weight[0], str): # list of str - return label_weight - if isinstance(label_weight[0], (list, tuple, np.ndarray)): # list of w - return label_weight - # single weight - return self._duplicate_to_list(label_weight, num) diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index 8b523e634d..7ffa397afa 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -225,9 +225,6 @@ class ScanNetSegDataset(Custom3DSegDataset): scene_idxs (np.ndarray | str, optional): Precomputed index to load data. For scenes with many points, we may sample it several times. Defaults to None. - label_weight (np.ndarray | str, optional): Precomputed weight to \ - balance loss calculation. If None is given, compute from data. - Defaults to None. """ CLASSES = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', @@ -271,8 +268,7 @@ def __init__(self, modality=None, test_mode=False, ignore_index=None, - scene_idxs=None, - label_weight=None): + scene_idxs=None): super().__init__( data_root=data_root, @@ -283,8 +279,7 @@ def __init__(self, modality=modality, test_mode=test_mode, ignore_index=ignore_index, - scene_idxs=scene_idxs, - label_weight=label_weight) + scene_idxs=scene_idxs) def get_ann_info(self, index): """Get annotation info according to the given index. @@ -358,21 +353,17 @@ def show(self, results, out_dir, show=True, pipeline=None): pred_sem_mask, out_dir, file_name, np.array(self.PALETTE), self.ignore_index, show) - def get_scene_idxs_and_label_weight(self, scene_idxs, label_weight): - """Compute scene_idxs for data sampling and label weight for loss \ - calculation. + def get_scene_idxs(self, scene_idxs): + """Compute scene_idxs for data sampling. - We sample more times for scenes with more points. Label_weight is - inversely proportional to number of class points. + We sample more times for scenes with more points. """ # when testing, we load one whole scene every time - # and we don't need label weight for loss calculation if not self.test_mode and scene_idxs is None: raise NotImplementedError( 'please provide re-sampled scene indexes for training') - return super().get_scene_idxs_and_label_weight(scene_idxs, - label_weight) + return super().get_scene_idxs(scene_idxs) def format_results(self, results, txtfile_prefix=None): r"""Format the results to txt file. Refer to `ScanNet documentation diff --git a/tests/test_data/test_datasets/test_s3dis_dataset.py b/tests/test_data/test_datasets/test_s3dis_dataset.py index 0067ba75f3..1c34eb7196 100644 --- a/tests/test_data/test_datasets/test_s3dis_dataset.py +++ b/tests/test_data/test_datasets/test_s3dis_dataset.py @@ -17,11 +17,6 @@ def test_seg_getitem(): [170, 120, 200], [255, 0, 0], [200, 100, 100], [10, 200, 100], [200, 200, 200], [50, 50, 50]] scene_idxs = [0 for _ in range(20)] - label_weight = [ - 3.0441623, 3.3606708, 2.6408234, 4.5086737, 4.8403897, 4.7637715, - 4.4685297, 4.7051463, 4.9190116, 5.3899403, 4.6436925, 5.0669650, - 3.6270046 - ] pipelines = [ dict( @@ -65,8 +60,7 @@ def test_seg_getitem(): modality=None, test_mode=False, ignore_index=None, - scene_idxs=scene_idxs, - label_weights=label_weight) + scene_idxs=scene_idxs) data = s3dis_dataset[0] points = data['points']._data @@ -102,8 +96,6 @@ def test_seg_getitem(): assert original_palette == palette assert s3dis_dataset.scene_idxs.dtype == np.int32 assert np.all(s3dis_dataset.scene_idxs == np.array(scene_idxs)) - assert np.allclose(s3dis_dataset.label_weight, np.array(label_weight), - 1e-5) # test dataset with selected classes s3dis_dataset = S3DISSegDataset( @@ -122,7 +114,6 @@ def test_seg_getitem(): assert s3dis_dataset.VALID_CLASS_IDS == [3, 5] assert s3dis_dataset.label_map == label_map assert s3dis_dataset.label2cat == {0: 'beam', 1: 'window'} - assert np.all(s3dis_dataset.label_weight == np.ones(2)) # test load classes from file import tempfile @@ -160,7 +151,6 @@ def test_seg_getitem(): test_mode=True, scene_idxs=scene_idxs) assert np.all(s3dis_dataset.scene_idxs == np.array([0])) - assert np.all(s3dis_dataset.label_weight == np.ones(len(class_names))) def test_seg_evaluate(): @@ -232,33 +222,23 @@ def test_multi_areas(): [170, 120, 200], [255, 0, 0], [200, 100, 100], [10, 200, 100], [200, 200, 200], [50, 50, 50]] scene_idxs = [0 for _ in range(20)] - label_weight = [ - 3.0441623, 3.3606708, 2.6408234, 4.5086737, 4.8403897, 4.7637715, - 4.4685297, 4.7051463, 4.9190116, 5.3899403, 4.6436925, 5.0669650, - 3.6270046 - ] # repeat repeat_num = 3 s3dis_dataset = S3DISSegDataset( data_root=root_path, ann_files=[ann_file for _ in range(repeat_num)], - scene_idxs=scene_idxs, - label_weights=label_weight) + scene_idxs=scene_idxs) assert s3dis_dataset.CLASSES == class_names assert s3dis_dataset.PALETTE == palette assert len(s3dis_dataset.data_infos) == repeat_num assert np.all(s3dis_dataset.scene_idxs == np.concatenate( [np.array(scene_idxs) + i for i in range(repeat_num)])) - assert np.allclose(s3dis_dataset.label_weight, np.array(label_weight)) - # different scene_idxs and label_weight input - label_weights = np.random.rand(repeat_num, len(class_names)) + # different scene_idxs input s3dis_dataset = S3DISSegDataset( data_root=root_path, ann_files=[ann_file for _ in range(repeat_num)], - scene_idxs=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 3, 4], [0, 1, 1, 2, 2, 2]], - label_weights=label_weights) + scene_idxs=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 3, 4], [0, 1, 1, 2, 2, 2]]) assert np.all(s3dis_dataset.scene_idxs == np.array( [0, 0, 1, 2, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 10, 10])) - assert np.allclose(s3dis_dataset.label_weight, label_weights.mean(0)) diff --git a/tests/test_data/test_datasets/test_scannet_dataset.py b/tests/test_data/test_datasets/test_scannet_dataset.py index b907fe92cb..bf438d3816 100644 --- a/tests/test_data/test_datasets/test_scannet_dataset.py +++ b/tests/test_data/test_datasets/test_scannet_dataset.py @@ -310,12 +310,6 @@ def test_seg_getitem(): [82, 84, 163], ] scene_idxs = [0 for _ in range(20)] - label_weight = [ - 2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941, - 4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071, 5.019636, - 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416, 5.3954206, - 4.6971426 - ] # test network inputs are (xyz, rgb, normalized_xyz) pipelines = [ @@ -361,8 +355,7 @@ def test_seg_getitem(): modality=None, test_mode=False, ignore_index=None, - scene_idxs=scene_idxs, - label_weight=label_weight) + scene_idxs=scene_idxs) data = scannet_dataset[0] points = data['points']._data @@ -407,8 +400,6 @@ def test_seg_getitem(): assert original_palette == palette assert scannet_dataset.scene_idxs.dtype == np.int32 assert np.all(scannet_dataset.scene_idxs == np.array(scene_idxs)) - assert np.allclose(scannet_dataset.label_weight, np.array(label_weight), - 1e-5) # test network inputs are (xyz, rgb) np.random.seed(0) @@ -496,7 +487,6 @@ def test_seg_getitem(): assert scannet_dataset.VALID_CLASS_IDS == [3, 5] assert scannet_dataset.label_map == label_map assert scannet_dataset.label2cat == {0: 'cabinet', 1: 'chair'} - assert np.all(scannet_dataset.label_weight == np.ones(2)) # test load classes from file import tempfile @@ -534,7 +524,6 @@ def test_seg_getitem(): test_mode=True, scene_idxs=scene_idxs) assert np.all(scannet_dataset.scene_idxs == np.array([0])) - assert np.all(scannet_dataset.label_weight == np.ones(len(class_names))) def test_seg_evaluate():