Skip to content

Commit

Permalink
[Feature] Support PointNet++ Segmentor (#528)
Browse files Browse the repository at this point in the history
* build BaseSegmentor for point sem seg

* add encoder-decoder segmentor

* update mmseg dependency

* fix linting errors

* warp predicted seg_mask in dict

* add unit test

* use build_model to wrap detector and segmentor

* fix compatibility with mmseg

* faster sliding inference

* merge master

* configs for training on ScanNet

* fix CI errors

* add comments & fix typos

* hard-code class_weight into configs

* fix logger bugs

* update segmentor unit test

* logger use mmdet3d

* use eps to replace hard-coded 1e-3

* add comments

* replace np operation with torch code

* add comments for class_weight

* add comment for BaseSegmentor.simple_test

* rewrite EncoderDecoder3D to avoid inheriting from mmseg
  • Loading branch information
Wuziyi616 authored May 12, 2021
1 parent 43d7953 commit 9d852f1
Show file tree
Hide file tree
Showing 27 changed files with 1,034 additions and 44 deletions.
2 changes: 1 addition & 1 deletion configs/_base_/datasets/scannet_seg-3d-20class.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
block_size=1.5,
sample_rate=1.0,
ignore_index=len(class_names),
use_normalized_coord=True),
use_normalized_coord=False),
dict(type='NormalizePointsColor', color_mean=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
Expand Down
28 changes: 28 additions & 0 deletions configs/_base_/models/pointnet2_msg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
_base_ = './pointnet2_ssg.py'

# model settings
model = dict(
backbone=dict(
_delete_=True,
type='PointNet2SAMSG',
in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16),
radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
128)),
((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
(256, 384, 512))),
aggregation_channels=(None, None, None, None),
fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
fps_sample_range_lists=((-1), (-1), (-1), (-1)),
dilated_group=(False, False, False, False),
out_indices=(0, 1, 2, 3),
sa_cfg=dict(
type='PointSAModuleMSG',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
decode_head=dict(
fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
(128, 128, 128, 128))))
35 changes: 35 additions & 0 deletions configs/_base_/models/pointnet2_ssg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# model settings
model = dict(
type='EncoderDecoder3D',
backbone=dict(
type='PointNet2SASSG',
in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16),
radius=(0.1, 0.2, 0.4, 0.8),
num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)),
fp_channels=(),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
decode_head=dict(
type='PointNet2Head',
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128, 128, 128, 128)),
channels=128,
dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
loss_decode=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
class_weight=None, # should be modified with dataset
loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide'))
40 changes: 40 additions & 0 deletions configs/pointnet2/pointnet2_msg_16x2_scannet-3d-20class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
_base_ = [
'../_base_/datasets/scannet_seg-3d-20class.py',
'../_base_/models/pointnet2_msg.py', '../_base_/default_runtime.py'
]

# data settings
data = dict(samples_per_gpu=16)
evaluation = dict(interval=5)

# model settings
model = dict(
decode_head=dict(
num_classes=20,
ignore_index=20,
# `class_weight` is generated in data pre-processing, saved in
# `data/scannet/seg_info/train_label_weight.npy`
# you can copy paste the values here, or input the file path as
# `class_weight=data/scannet/seg_info/train_label_weight.npy`
loss_decode=dict(class_weight=[
2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,
5.3954206, 4.6971426
])),
test_cfg=dict(
num_points=8192,
block_size=1.5,
sample_rate=0.5,
use_normalized_coord=False,
batch_size=24))

# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='Adam', lr=lr, weight_decay=1e-4)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)

# runtime settings
checkpoint_config = dict(interval=5)
runner = dict(type='EpochBasedRunner', max_epochs=150)
40 changes: 40 additions & 0 deletions configs/pointnet2/pointnet2_ssg_16x2_scannet-3d-20class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
_base_ = [
'../_base_/datasets/scannet_seg-3d-20class.py',
'../_base_/models/pointnet2_ssg.py', '../_base_/default_runtime.py'
]

# data settings
data = dict(samples_per_gpu=16)
evaluation = dict(interval=5)

# model settings
model = dict(
decode_head=dict(
num_classes=20,
ignore_index=20,
# `class_weight` is generated in data pre-processing, saved in
# `data/scannet/seg_info/train_label_weight.npy`
# you can copy paste the values here, or input the file path as
# `class_weight=data/scannet/seg_info/train_label_weight.npy`
loss_decode=dict(class_weight=[
2.389689, 2.7215734, 4.5944676, 4.8543367, 4.096086, 4.907941,
4.690836, 4.512031, 4.623311, 4.9242644, 5.358117, 5.360071,
5.019636, 4.967126, 5.3502126, 5.4023647, 5.4027233, 5.4169416,
5.3954206, 4.6971426
])),
test_cfg=dict(
num_points=8192,
block_size=1.5,
sample_rate=0.5,
use_normalized_coord=False,
batch_size=24))

# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='Adam', lr=lr, weight_decay=1e-4)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)

# runtime settings
checkpoint_config = dict(interval=5)
runner = dict(type='EpochBasedRunner', max_epochs=150)
3 changes: 2 additions & 1 deletion mmdet3d/apis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
inference_multi_modality_detector, init_detector,
show_result_meshlab)
from .test import single_gpu_test
from .train import train_model

__all__ = [
'inference_detector', 'init_detector', 'single_gpu_test',
'show_result_meshlab', 'convert_SyncBN',
'show_result_meshlab', 'convert_SyncBN', 'train_model',
'inference_multi_modality_detector'
]
4 changes: 2 additions & 2 deletions mmdet3d/apis/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
show_result)
from mmdet3d.core.bbox import get_box_type
from mmdet3d.datasets.pipelines import Compose
from mmdet3d.models import build_detector
from mmdet3d.models import build_model


def convert_SyncBN(config):
Expand Down Expand Up @@ -52,7 +52,7 @@ def init_detector(config, checkpoint=None, device='cuda:0'):
config.model.pretrained = None
convert_SyncBN(config.model)
config.model.train_cfg = None
model = build_detector(config.model, test_cfg=config.get('test_cfg'))
model = build_model(config.model, test_cfg=config.get('test_cfg'))
if checkpoint is not None:
checkpoint = load_checkpoint(model, checkpoint)
if 'CLASSES' in checkpoint['meta']:
Expand Down
13 changes: 7 additions & 6 deletions mmdet3d/apis/test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import mmcv
import os
import torch
from mmcv.image import tensor2imgs
from os import path as osp

from mmdet3d.models import Base3DDetector, Base3DSegmentor


def single_gpu_test(model,
Expand Down Expand Up @@ -35,11 +37,11 @@ def single_gpu_test(model,
result = model(return_loss=False, rescale=True, **data)

if show:
# Visualize the results of MMdetection3D model
# Visualize the results of MMDetection3D model
# 'show_results' is MMdetection3D visualization API
if hasattr(model.module, 'show_results'):
if isinstance(model.module, (Base3DDetector, Base3DSegmentor)):
model.module.show_results(data, result, out_dir)
# Visualize the results of MMdetection model
# Visualize the results of MMDetection model
# 'show_result' is MMdetection visualization API
else:
batch_size = len(result)
Expand All @@ -60,8 +62,7 @@ def single_gpu_test(model,
img_show = mmcv.imresize(img_show, (ori_w, ori_h))

if out_dir:
out_file = os.path.join(out_dir,
img_meta['ori_filename'])
out_file = osp.join(out_dir, img_meta['ori_filename'])
else:
out_file = None

Expand Down
34 changes: 34 additions & 0 deletions mmdet3d/apis/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from mmdet.apis import train_detector
from mmseg.apis import train_segmentor


def train_model(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
if cfg.model.type in ['EncoderDecoder3D']:
train_segmentor(
model,
dataset,
cfg,
distributed=distributed,
validate=validate,
timestamp=timestamp,
meta=meta)
else:
train_detector(
model,
dataset,
cfg,
distributed=distributed,
validate=validate,
timestamp=timestamp,
meta=meta)
4 changes: 3 additions & 1 deletion mmdet3d/datasets/custom_3d_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
from torch.utils.data import Dataset

from mmdet.datasets import DATASETS
from mmseg.datasets import DATASETS as SEG_DATASETS
from .pipelines import Compose
from .utils import get_loading_pipeline


@DATASETS.register_module()
@SEG_DATASETS.register_module()
class Custom3DSegDataset(Dataset):
"""Customized 3D dataset for semantic segmentation task.
Expand Down Expand Up @@ -143,7 +145,7 @@ def pre_pipeline(self, results):
results['pts_seg_fields'] = []
results['mask_fields'] = []
results['seg_fields'] = []
results['gt_bboxes_3d'] = []
results['bbox3d_fields'] = []

def prepare_train_data(self, index):
"""Training data preparation.
Expand Down
2 changes: 2 additions & 0 deletions mmdet3d/datasets/s3dis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@

from mmdet3d.core import show_seg_result
from mmdet.datasets import DATASETS
from mmseg.datasets import DATASETS as SEG_DATASETS
from .custom_3d_seg import Custom3DSegDataset
from .pipelines import Compose


@DATASETS.register_module()
@SEG_DATASETS.register_module()
class _S3DISSegDataset(Custom3DSegDataset):
r"""S3DIS Dataset for Semantic Segmentation Task.
Expand Down
2 changes: 2 additions & 0 deletions mmdet3d/datasets/scannet_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mmdet3d.core import show_result, show_seg_result
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS
from mmseg.datasets import DATASETS as SEG_DATASETS
from .custom_3d import Custom3DDataset
from .custom_3d_seg import Custom3DSegDataset
from .pipelines import Compose
Expand Down Expand Up @@ -196,6 +197,7 @@ def show(self, results, out_dir, show=True, pipeline=None):


@DATASETS.register_module()
@SEG_DATASETS.register_module()
class ScanNetSegDataset(Custom3DSegDataset):
r"""ScanNet Dataset for Semantic Segmentation Task.
Expand Down
9 changes: 5 additions & 4 deletions mmdet3d/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .backbones import * # noqa: F401,F403
from .builder import (FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS,
build_backbone, build_detector, build_fusion_layer,
build_head, build_loss, build_middle_encoder, build_neck,
build_roi_extractor, build_shared_head,
build_voxel_encoder)
build_head, build_loss, build_middle_encoder,
build_model, build_neck, build_roi_extractor,
build_shared_head, build_voxel_encoder)
from .decode_heads import * # noqa: F401,F403
from .dense_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403
Expand All @@ -13,11 +13,12 @@
from .model_utils import * # noqa: F401,F403
from .necks import * # noqa: F401,F403
from .roi_heads import * # noqa: F401,F403
from .segmentors import * # noqa: F401,F403
from .voxel_encoders import * # noqa: F401,F403

__all__ = [
'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 'FUSION_LAYERS', 'build_backbone',
'build_neck', 'build_roi_extractor', 'build_shared_head', 'build_head',
'build_loss', 'build_detector', 'build_fusion_layer',
'build_loss', 'build_detector', 'build_fusion_layer', 'build_model',
'build_middle_encoder', 'build_voxel_encoder'
]
26 changes: 26 additions & 0 deletions mmdet3d/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from mmdet.models.builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
ROI_EXTRACTORS, SHARED_HEADS, build)
from mmseg.models.builder import SEGMENTORS

VOXEL_ENCODERS = Registry('voxel_encoder')
MIDDLE_ENCODERS = Registry('middle_encoder')
Expand Down Expand Up @@ -52,6 +53,31 @@ def build_detector(cfg, train_cfg=None, test_cfg=None):
return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))


def build_segmentor(cfg, train_cfg=None, test_cfg=None):
"""Build segmentor."""
if train_cfg is not None or test_cfg is not None:
warnings.warn(
'train_cfg and test_cfg is deprecated, '
'please specify them in model', UserWarning)
assert cfg.get('train_cfg') is None or train_cfg is None, \
'train_cfg specified in both outer field and model field '
assert cfg.get('test_cfg') is None or test_cfg is None, \
'test_cfg specified in both outer field and model field '
return build(cfg, SEGMENTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))


def build_model(cfg, train_cfg=None, test_cfg=None):
"""A function warpper for building 3D detector or segmentor according to
cfg.
Should be deprecated in the future.
"""
if cfg.type in ['EncoderDecoder3D']:
return build_segmentor(cfg, train_cfg=train_cfg, test_cfg=test_cfg)
else:
return build_detector(cfg, train_cfg=train_cfg, test_cfg=test_cfg)


def build_voxel_encoder(cfg):
"""Build voxel encoder."""
return build(cfg, VOXEL_ENCODERS)
Expand Down
Loading

0 comments on commit 9d852f1

Please sign in to comment.