[Feature] Support PointNet++ Segmentor (#528)

* build BaseSegmentor for point sem seg * add encoder-decoder segmentor * update mmseg dependency * fix linting errors * warp predicted seg_mask in dict * add unit test * use build_model to wrap detector and segmentor * fix compatibility with mmseg * faster sliding inference * merge master * configs for training on ScanNet * fix CI errors * add comments & fix typos * hard-code class_weight into configs * fix logger bugs * update segmentor unit test * logger use mmdet3d * use eps to replace hard-coded 1e-3 * add comments * replace np operation with torch code * add comments for class_weight * add comment for BaseSegmentor.simple_test * rewrite EncoderDecoder3D to avoid inheriting from mmseg
open-mmlab · May 12, 2021 · 9d852f1 · 9d852f1
1 parent 43d7953
commit 9d852f1
Show file tree

Hide file tree

Showing 27 changed files with 1,034 additions and 44 deletions.
diff --git a/configs/_base_/datasets/scannet_seg-3d-20class.py b/configs/_base_/datasets/scannet_seg-3d-20class.py
@@ -31,7 +31,7 @@
  block_size=1.5,
  sample_rate=1.0,
  ignore_index=len(class_names),
- use_normalized_coord=True),
+ use_normalized_coord=False),
  dict(type='NormalizePointsColor', color_mean=None),
  dict(type='DefaultFormatBundle3D', class_names=class_names),
  dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])

diff --git a/configs/_base_/models/pointnet2_msg.py b/configs/_base_/models/pointnet2_msg.py
@@ -0,0 +1,28 @@
+_base_ = './pointnet2_ssg.py'
+
+# model settings
+model = dict(
+ backbone=dict(
+ _delete_=True,
+ type='PointNet2SAMSG',
+ in_channels=6, # [xyz, rgb], should be modified with dataset
+ num_points=(1024, 256, 64, 16),
+ radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
+ num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
+ sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
+ 128)),
+ ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
+ (256, 384, 512))),
+ aggregation_channels=(None, None, None, None),
+ fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
+ fps_sample_range_lists=((-1), (-1), (-1), (-1)),
+ dilated_group=(False, False, False, False),
+ out_indices=(0, 1, 2, 3),
+ sa_cfg=dict(
+ type='PointSAModuleMSG',
+ pool_mod='max',
+ use_xyz=True,
+ normalize_xyz=False)),
+ decode_head=dict(
+ fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
+ (128, 128, 128, 128))))
diff --git a/configs/_base_/models/pointnet2_ssg.py b/configs/_base_/models/pointnet2_ssg.py
@@ -0,0 +1,35 @@
+# model settings
+model = dict(
+ type='EncoderDecoder3D',
+ backbone=dict(
+ type='PointNet2SASSG',
+ in_channels=6, # [xyz, rgb], should be modified with dataset
+ num_points=(1024, 256, 64, 16),
+ radius=(0.1, 0.2, 0.4, 0.8),
+ num_samples=(32, 32, 32, 32),
+ sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
+ 512)),
+ fp_channels=(),
+ norm_cfg=dict(type='BN2d'),
+ sa_cfg=dict(
+ type='PointSAModule',
+ pool_mod='max',
+ use_xyz=True,
+ normalize_xyz=False)),
+ decode_head=dict(
+ type='PointNet2Head',
+ fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
+ (128, 128, 128, 128)),
+ channels=128,
+ dropout_ratio=0.5,
+ conv_cfg=dict(type='Conv1d'),
+ norm_cfg=dict(type='BN1d'),
+ act_cfg=dict(type='ReLU'),
+ loss_decode=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ class_weight=None, # should be modified with dataset
+ loss_weight=1.0)),
+ # model training and testing settings
+ train_cfg=dict(),
+ test_cfg=dict(mode='slide'))
diff --git a/configs/pointnet2/pointnet2_msg_16x2_scannet-3d-20class.py b/configs/pointnet2/pointnet2_msg_16x2_scannet-3d-20class.py
@@ -0,0 +1,40 @@
+_base_ = [
+ '../_base_/datasets/scannet_seg-3d-20class.py',
+ '../_base_/models/pointnet2_msg.py', '../_base_/default_runtime.py'
+]
+
+# data settings
+data = dict(samples_per_gpu=16)
+evaluation = dict(interval=5)
+
+# model settings
+model = dict(
+ decode_head=dict(
+ num_classes=20,
+ ignore_index=20,
+ # `class_weight` is generated in data pre-processing, saved in
+ # `data/scannet/seg_info/train_label_weight.npy`
+ # you can copy paste the values here, or input the file path as
+ # `class_weight=data/scannet/seg_info/train_label_weight.npy`
+ loss_decode=dict(class_weight=[
+ 2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
+ 4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
+ 5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,
+ 5.3954206, 4.6971426
+ ])),
+ test_cfg=dict(
+ num_points=8192,
+ block_size=1.5,
+ sample_rate=0.5,
+ use_normalized_coord=False,
+ batch_size=24))
+
+# optimizer
+lr = 0.001 # max learning rate
+optimizer = dict(type='Adam', lr=lr, weight_decay=1e-4)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
+
+# runtime settings
+checkpoint_config = dict(interval=5)
+runner = dict(type='EpochBasedRunner', max_epochs=150)
diff --git a/configs/pointnet2/pointnet2_ssg_16x2_scannet-3d-20class.py b/configs/pointnet2/pointnet2_ssg_16x2_scannet-3d-20class.py
@@ -0,0 +1,40 @@
+_base_ = [
+ '../_base_/datasets/scannet_seg-3d-20class.py',
+ '../_base_/models/pointnet2_ssg.py', '../_base_/default_runtime.py'
+]
+
+# data settings
+data = dict(samples_per_gpu=16)
+evaluation = dict(interval=5)
+
+# model settings
+model = dict(
+ decode_head=dict(
+ num_classes=20,
+ ignore_index=20,
+ # `class_weight` is generated in data pre-processing, saved in
+ # `data/scannet/seg_info/train_label_weight.npy`
+ # you can copy paste the values here, or input the file path as
+ # `class_weight=data/scannet/seg_info/train_label_weight.npy`
+ loss_decode=dict(class_weight=[
+ 2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
+ 4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
+ 5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,
+ 5.3954206, 4.6971426
+ ])),
+ test_cfg=dict(
+ num_points=8192,
+ block_size=1.5,
+ sample_rate=0.5,
+ use_normalized_coord=False,
+ batch_size=24))
+
+# optimizer
+lr = 0.001 # max learning rate
+optimizer = dict(type='Adam', lr=lr, weight_decay=1e-4)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
+
+# runtime settings
+checkpoint_config = dict(interval=5)
+runner = dict(type='EpochBasedRunner', max_epochs=150)
diff --git a/mmdet3d/apis/__init__.py b/mmdet3d/apis/__init__.py
@@ -2,9 +2,10 @@
  inference_multi_modality_detector, init_detector,
  show_result_meshlab)
 from .test import single_gpu_test
+from .train import train_model
 
 __all__ = [
  'inference_detector', 'init_detector', 'single_gpu_test',
- 'show_result_meshlab', 'convert_SyncBN',
+ 'show_result_meshlab', 'convert_SyncBN', 'train_model',
  'inference_multi_modality_detector'
 ]
diff --git a/mmdet3d/apis/inference.py b/mmdet3d/apis/inference.py
@@ -12,7 +12,7 @@
  show_result)
 from mmdet3d.core.bbox import get_box_type
 from mmdet3d.datasets.pipelines import Compose
-from mmdet3d.models import build_detector
+from mmdet3d.models import build_model
 
 
 def convert_SyncBN(config):
@@ -52,7 +52,7 @@ def init_detector(config, checkpoint=None, device='cuda:0'):
  config.model.pretrained = None
  convert_SyncBN(config.model)
  config.model.train_cfg = None
- model = build_detector(config.model, test_cfg=config.get('test_cfg'))
+ model = build_model(config.model, test_cfg=config.get('test_cfg'))
  if checkpoint is not None:
  checkpoint = load_checkpoint(model, checkpoint)
  if 'CLASSES' in checkpoint['meta']:

diff --git a/mmdet3d/apis/test.py b/mmdet3d/apis/test.py
@@ -1,7 +1,9 @@
 import mmcv
-import os
 import torch
 from mmcv.image import tensor2imgs
+from os import path as osp
+
+from mmdet3d.models import Base3DDetector, Base3DSegmentor
 
 
 def single_gpu_test(model,
@@ -35,11 +37,11 @@ def single_gpu_test(model,
  result = model(return_loss=False, rescale=True, **data)
 
  if show:
- # Visualize the results of MMdetection3D model
+ # Visualize the results of MMDetection3D model
  # 'show_results' is MMdetection3D visualization API
- if hasattr(model.module, 'show_results'):
+ if isinstance(model.module, (Base3DDetector, Base3DSegmentor)):
  model.module.show_results(data, result, out_dir)
- # Visualize the results of MMdetection model
+ # Visualize the results of MMDetection model
  # 'show_result' is MMdetection visualization API
  else:
  batch_size = len(result)
@@ -60,8 +62,7 @@ def single_gpu_test(model,
  img_show = mmcv.imresize(img_show, (ori_w, ori_h))
 
  if out_dir:
- out_file = os.path.join(out_dir,
- img_meta['ori_filename'])
+ out_file = osp.join(out_dir, img_meta['ori_filename'])
  else:
  out_file = None
 

diff --git a/mmdet3d/apis/train.py b/mmdet3d/apis/train.py
@@ -0,0 +1,34 @@
+from mmdet.apis import train_detector
+from mmseg.apis import train_segmentor
+
+
+def train_model(model,
+ dataset,
+ cfg,
+ distributed=False,
+ validate=False,
+ timestamp=None,
+ meta=None):
+ """A function wrapper for launching model training according to cfg.
+
+ Because we need different eval_hook in runner. Should be deprecated in the
+ future.
+ """
+ if cfg.model.type in ['EncoderDecoder3D']:
+ train_segmentor(
+ model,
+ dataset,
+ cfg,
+ distributed=distributed,
+ validate=validate,
+ timestamp=timestamp,
+ meta=meta)
+ else:
+ train_detector(
+ model,
+ dataset,
+ cfg,
+ distributed=distributed,
+ validate=validate,
+ timestamp=timestamp,
+ meta=meta)
diff --git a/mmdet3d/datasets/custom_3d_seg.py b/mmdet3d/datasets/custom_3d_seg.py
@@ -6,11 +6,13 @@
 from torch.utils.data import Dataset
 
 from mmdet.datasets import DATASETS
+from mmseg.datasets import DATASETS as SEG_DATASETS
 from .pipelines import Compose
 from .utils import get_loading_pipeline
 
 
 @DATASETS.register_module()
+@SEG_DATASETS.register_module()
 class Custom3DSegDataset(Dataset):
  """Customized 3D dataset for semantic segmentation task.
 
@@ -143,7 +145,7 @@ def pre_pipeline(self, results):
  results['pts_seg_fields'] = []
  results['mask_fields'] = []
  results['seg_fields'] = []
- results['gt_bboxes_3d'] = []
+ results['bbox3d_fields'] = []
 
  def prepare_train_data(self, index):
  """Training data preparation.

diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py
@@ -3,11 +3,13 @@
 
 from mmdet3d.core import show_seg_result
 from mmdet.datasets import DATASETS
+from mmseg.datasets import DATASETS as SEG_DATASETS
 from .custom_3d_seg import Custom3DSegDataset
 from .pipelines import Compose
 
 
 @DATASETS.register_module()
+@SEG_DATASETS.register_module()
 class _S3DISSegDataset(Custom3DSegDataset):
  r"""S3DIS Dataset for Semantic Segmentation Task.
 

diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py
@@ -6,6 +6,7 @@
 from mmdet3d.core import show_result, show_seg_result
 from mmdet3d.core.bbox import DepthInstance3DBoxes
 from mmdet.datasets import DATASETS
+from mmseg.datasets import DATASETS as SEG_DATASETS
 from .custom_3d import Custom3DDataset
 from .custom_3d_seg import Custom3DSegDataset
 from .pipelines import Compose
@@ -196,6 +197,7 @@ def show(self, results, out_dir, show=True, pipeline=None):
 
 
 @DATASETS.register_module()
+@SEG_DATASETS.register_module()
 class ScanNetSegDataset(Custom3DSegDataset):
  r"""ScanNet Dataset for Semantic Segmentation Task.
 

diff --git a/mmdet3d/models/__init__.py b/mmdet3d/models/__init__.py
@@ -1,9 +1,9 @@
 from .backbones import * # noqa: F401,F403
 from .builder import (FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS,
  build_backbone, build_detector, build_fusion_layer,
- build_head, build_loss, build_middle_encoder, build_neck,
- build_roi_extractor, build_shared_head,
- build_voxel_encoder)
+ build_head, build_loss, build_middle_encoder,
+ build_model, build_neck, build_roi_extractor,
+ build_shared_head, build_voxel_encoder)
 from .decode_heads import * # noqa: F401,F403
 from .dense_heads import * # noqa: F401,F403
 from .detectors import * # noqa: F401,F403
@@ -13,11 +13,12 @@
 from .model_utils import * # noqa: F401,F403
 from .necks import * # noqa: F401,F403
 from .roi_heads import * # noqa: F401,F403
+from .segmentors import * # noqa: F401,F403
 from .voxel_encoders import * # noqa: F401,F403
 
 __all__ = [
  'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 'FUSION_LAYERS', 'build_backbone',
  'build_neck', 'build_roi_extractor', 'build_shared_head', 'build_head',
- 'build_loss', 'build_detector', 'build_fusion_layer',
+ 'build_loss', 'build_detector', 'build_fusion_layer', 'build_model',
  'build_middle_encoder', 'build_voxel_encoder'
 ]
diff --git a/mmdet3d/models/builder.py b/mmdet3d/models/builder.py
@@ -3,6 +3,7 @@
 
 from mmdet.models.builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
  ROI_EXTRACTORS, SHARED_HEADS, build)
+from mmseg.models.builder import SEGMENTORS
 
 VOXEL_ENCODERS = Registry('voxel_encoder')
 MIDDLE_ENCODERS = Registry('middle_encoder')
@@ -52,6 +53,31 @@ def build_detector(cfg, train_cfg=None, test_cfg=None):
  return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
 
 
+def build_segmentor(cfg, train_cfg=None, test_cfg=None):
+ """Build segmentor."""
+ if train_cfg is not None or test_cfg is not None:
+ warnings.warn(
+ 'train_cfg and test_cfg is deprecated, '
+ 'please specify them in model', UserWarning)
+ assert cfg.get('train_cfg') is None or train_cfg is None, \
+ 'train_cfg specified in both outer field and model field '
+ assert cfg.get('test_cfg') is None or test_cfg is None, \
+ 'test_cfg specified in both outer field and model field '
+ return build(cfg, SEGMENTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
+
+
+def build_model(cfg, train_cfg=None, test_cfg=None):
+ """A function warpper for building 3D detector or segmentor according to
+ cfg.
+
+ Should be deprecated in the future.
+ """
+ if cfg.type in ['EncoderDecoder3D']:
+ return build_segmentor(cfg, train_cfg=train_cfg, test_cfg=test_cfg)
+ else:
+ return build_detector(cfg, train_cfg=train_cfg, test_cfg=test_cfg)
+
+
 def build_voxel_encoder(cfg):
  """Build voxel encoder."""
  return build(cfg, VOXEL_ENCODERS)