diff --git a/README.md b/README.md index 7389577fe1..78f1a2d8bc 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ Supported methods: - [x] [DMNet (ICCV'2019)](configs/dmnet) - [x] [ANN (ICCV'2019)](configs/ann) - [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet) +- [x] [FastFCN (ArXiv'2019)](configs/fastfcn) - [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn) - [x] [ISANet (ArXiv'2019/IJCV'2021)](configs/isanet) - [x] [OCRNet (ECCV'2020)](configs/ocrnet) diff --git a/README_zh-CN.md b/README_zh-CN.md index 2622ed0f79..5ebef6f40e 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -89,6 +89,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O - [x] [DMNet (ICCV'2019)](configs/dmnet) - [x] [ANN (ICCV'2019)](configs/ann) - [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet) +- [x] [FastFCN (ArXiv'2019)](configs/fastfcn) - [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn) - [x] [ISANet (ArXiv'2019/IJCV'2021)](configs/isanet) - [x] [OCRNet (ECCV'2020)](configs/ocrnet) diff --git a/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py b/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py new file mode 100644 index 0000000000..9dc8609aeb --- /dev/null +++ b/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + dilations=(1, 1, 2, 4), + strides=(1, 2, 2, 2), + out_indices=(1, 2, 3), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='JPU', + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + norm_cfg=norm_cfg), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=2, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=1, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/README.md b/configs/fastfcn/README.md new file mode 100644 index 0000000000..768502b05f --- /dev/null +++ b/configs/fastfcn/README.md @@ -0,0 +1,41 @@ +# FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation + +## Introduction + + + +Official Repo + +Code Snippet + +
+FastFCN (ArXiv'2019) + +```latex +@article{wu2019fastfcn, +title={Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation}, +author={Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu, Yizhou}, +journal={arXiv preprint arXiv:1903.11816}, +year={2019} +} +``` + +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | --------- | --------- | ------: | -------- | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 + JPU | R-50-D32 | 512x1024 | 80000 | 5.67 | 2.64 | 79.12 | 80.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| DeepLabV3 + JPU (4x4) | R-50-D32 | 512x1024 | 80000 | 9.79 | - | 79.52 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json) | +| PSPNet + JPU | R-50-D32 | 512x1024 | 80000 | 5.67 | 4.40 | 79.26 | 80.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| PSPNet + JPU (4x4) | R-50-D32 | 512x1024 | 80000 | 9.94 | - | 78.76 | 80.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json) | +| EncNet + JPU | R-50-D32 | 512x1024 | 80000 | 8.15 | 4.77 | 77.97 |79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json) | +| EncNet + JPU (4x4)| R-50-D32 | 512x1024 | 80000 | 15.45 | - | 78.6 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json) | + +Note: + +- `4x4` means 4 GPUs with 4 samples per GPU in training, default setting is 4 GPUs with 2 samples per GPU in training. +- Results of [DeepLabV3 (mIoU: 79.32)](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3), [PSPNet (mIoU: 78.55)](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) and [ENCNet (mIoU: 77.94)](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) can be found in each original repository. diff --git a/configs/fastfcn/fastfcn.yml b/configs/fastfcn/fastfcn.yml new file mode 100644 index 0000000000..5af2b64a97 --- /dev/null +++ b/configs/fastfcn/fastfcn.yml @@ -0,0 +1,126 @@ +Collections: +- Name: fastfcn + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1903.11816 + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + README: configs/fastfcn/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Version: v0.18.0 + Converted From: + Code: https://github.com/wuhuikai/FastFCN +Models: +- Name: fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes + In Collection: fastfcn + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 378.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + memory (GB): 5.67 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.12 + mIoU(ms+flip): 80.58 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth +- Name: fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes + In Collection: fastfcn + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + memory (GB): 9.79 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.52 + mIoU(ms+flip): 80.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth +- Name: fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes + In Collection: fastfcn + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 227.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + memory (GB): 5.67 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.26 + mIoU(ms+flip): 80.86 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth +- Name: fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes + In Collection: fastfcn + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + memory (GB): 9.94 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.76 + mIoU(ms+flip): 80.03 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth +- Name: fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes + In Collection: fastfcn + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 209.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + memory (GB): 8.15 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.97 + mIoU(ms+flip): 79.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth +- Name: fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes + In Collection: fastfcn + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + memory (GB): 15.45 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.6 + mIoU(ms+flip): 80.25 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..87fc274dc5 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py' +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..dc86da3b6f --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..59d294b5f4 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py' +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..cc68edfe5b --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..5fe5ca16b1 --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py new file mode 100644 index 0000000000..e7637fabed --- /dev/null +++ b/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] diff --git a/mmseg/models/necks/__init__.py b/mmseg/models/necks/__init__.py index 15edad493c..aba73f165b 100644 --- a/mmseg/models/necks/__init__.py +++ b/mmseg/models/necks/__init__.py @@ -1,7 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. from .fpn import FPN from .ic_neck import ICNeck +from .jpu import JPU from .mla_neck import MLANeck from .multilevel_neck import MultiLevelNeck -__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck'] +__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck', 'JPU'] diff --git a/mmseg/models/necks/jpu.py b/mmseg/models/necks/jpu.py new file mode 100644 index 0000000000..3cc6b9f428 --- /dev/null +++ b/mmseg/models/necks/jpu.py @@ -0,0 +1,131 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import NECKS + + +@NECKS.register_module() +class JPU(BaseModule): + """FastFCN: Rethinking Dilated Convolution in the Backbone + for Semantic Segmentation. + + This Joint Pyramid Upsampling (JPU) neck is the implementation of + `FastFCN `_. + + Args: + in_channels (Tuple[int], optional): The number of input channels + for each convolution operations before upsampling. + Default: (512, 1024, 2048). + mid_channels (int): The number of output channels of JPU. + Default: 512. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + dilations (tuple[int]): Dilation rate of each Depthwise + Separable ConvModule. Default: (1, 2, 4, 8). + align_corners (bool, optional): The align_corners argument of + resize operation. Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(JPU, self).__init__(init_cfg=init_cfg) + assert isinstance(in_channels, tuple) + assert isinstance(dilations, tuple) + self.in_channels = in_channels + self.mid_channels = mid_channels + self.start_level = start_level + self.num_ins = len(in_channels) + if end_level == -1: + self.backbone_end_level = self.num_ins + else: + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + + self.dilations = dilations + self.align_corners = align_corners + + self.conv_layers = nn.ModuleList() + self.dilation_layers = nn.ModuleList() + for i in range(self.start_level, self.backbone_end_level): + conv_layer = nn.Sequential( + ConvModule( + self.in_channels[i], + self.mid_channels, + kernel_size=3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.conv_layers.append(conv_layer) + for i in range(len(dilations)): + dilation_layer = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=(self.backbone_end_level - self.start_level) * + self.mid_channels, + out_channels=self.mid_channels, + kernel_size=3, + stride=1, + padding=dilations[i], + dilation=dilations[i], + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=norm_cfg, + pw_act_cfg=act_cfg)) + self.dilation_layers.append(dilation_layer) + + def forward(self, inputs): + """Forward function.""" + assert len(inputs) == len(self.in_channels), 'Length of inputs must \ + be the same with self.in_channels!' + + feats = [ + self.conv_layers[i - self.start_level](inputs[i]) + for i in range(self.start_level, self.backbone_end_level) + ] + + h, w = feats[0].shape[2:] + for i in range(1, len(feats)): + feats[i] = resize( + feats[i], + size=(h, w), + mode='bilinear', + align_corners=self.align_corners) + + feat = torch.cat(feats, dim=1) + concat_feat = torch.cat([ + self.dilation_layers[i](feat) for i in range(len(self.dilations)) + ], + dim=1) + + outs = [] + + # Default: outs[2] is the output of JPU for decoder head, outs[1] is + # the feature map from backbone for auxiliary head. Additionally, + # outs[0] can also be used for auxiliary head. + for i in range(self.start_level, self.backbone_end_level - 1): + outs.append(inputs[i]) + outs.append(concat_feat) + return tuple(outs) diff --git a/model-index.yml b/model-index.yml index f0f9bb80e9..00da8d6a2a 100644 --- a/model-index.yml +++ b/model-index.yml @@ -13,6 +13,7 @@ Import: - configs/dpt/dpt.yml - configs/emanet/emanet.yml - configs/encnet/encnet.yml +- configs/fastfcn/fastfcn.yml - configs/fastscnn/fastscnn.yml - configs/fcn/fcn.yml - configs/fp16/fp16.yml diff --git a/tests/test_models/test_necks/test_jpu.py b/tests/test_models/test_necks/test_jpu.py new file mode 100644 index 0000000000..88637044c6 --- /dev/null +++ b/tests/test_models/test_necks/test_jpu.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.necks import JPU + + +def test_fastfcn_neck(): + # Test FastFCN Standard Forward + model = JPU() + model.init_weights() + model.train() + batch_size = 1 + input = [ + torch.randn(batch_size, 512, 64, 128), + torch.randn(batch_size, 1024, 32, 64), + torch.randn(batch_size, 2048, 16, 32) + ] + feat = model(input) + + assert len(feat) == 3 + assert feat[0].shape == torch.Size([batch_size, 512, 64, 128]) + assert feat[1].shape == torch.Size([batch_size, 1024, 32, 64]) + assert feat[2].shape == torch.Size([batch_size, 2048, 64, 128]) + + with pytest.raises(AssertionError): + # FastFCN input and in_channels constraints. + JPU(in_channels=(256, 512, 1024), start_level=0, end_level=5) + + # Test not default start_level + model = JPU(in_channels=(512, 1024, 2048), start_level=1, end_level=-1) + input = [ + torch.randn(batch_size, 512, 64, 128), + torch.randn(batch_size, 1024, 32, 64), + torch.randn(batch_size, 2048, 16, 32) + ] + feat = model(input) + assert len(feat) == 2 + assert feat[0].shape == torch.Size([batch_size, 1024, 32, 64]) + assert feat[1].shape == torch.Size([batch_size, 2048, 32, 64])