diff --git a/README.md b/README.md
index 2443171c86..7389577fe1 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,7 @@ Supported methods:
- [x] [PSANet (ECCV'2018)](configs/psanet)
- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus)
- [x] [UPerNet (ECCV'2018)](configs/upernet)
+- [x] [ICNet (ECCV'2018)](configs/icnet)
- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net)
- [x] [EncNet (CVPR'2018)](configs/encnet)
- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index ac90eefeef..2622ed0f79 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -78,6 +78,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
- [x] [PSANet (ECCV'2018)](configs/psanet)
- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus)
- [x] [UPerNet (ECCV'2018)](configs/upernet)
+- [x] [ICNet (ECCV'2018)](configs/icnet)
- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net)
- [x] [EncNet (CVPR'2018)](configs/encnet)
- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn)
diff --git a/configs/_base_/datasets/cityscapes_832x832.py b/configs/_base_/datasets/cityscapes_832x832.py
new file mode 100644
index 0000000000..b9325cc008
--- /dev/null
+++ b/configs/_base_/datasets/cityscapes_832x832.py
@@ -0,0 +1,35 @@
+_base_ = './cityscapes.py'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (832, 832)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations'),
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', prob=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 1024),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ train=dict(pipeline=train_pipeline),
+ val=dict(pipeline=test_pipeline),
+ test=dict(pipeline=test_pipeline))
diff --git a/configs/_base_/models/icnet_r50-d8.py b/configs/_base_/models/icnet_r50-d8.py
new file mode 100644
index 0000000000..d7273cd28e
--- /dev/null
+++ b/configs/_base_/models/icnet_r50-d8.py
@@ -0,0 +1,74 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ backbone=dict(
+ type='ICNet',
+ backbone_cfg=dict(
+ type='ResNetV1c',
+ in_channels=3,
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ in_channels=3,
+ layer_channels=(512, 2048),
+ light_branch_middle_channels=32,
+ psp_out_channels=512,
+ out_channels=(64, 256, 256),
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ ),
+ neck=dict(
+ type='ICNeck',
+ in_channels=(64, 256, 256),
+ out_channels=128,
+ norm_cfg=norm_cfg,
+ align_corners=False),
+ decode_head=dict(
+ type='FCNHead',
+ in_channels=128,
+ channels=128,
+ num_convs=1,
+ in_index=2,
+ dropout_ratio=0,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=[
+ dict(
+ type='FCNHead',
+ in_channels=128,
+ channels=128,
+ num_convs=1,
+ num_classes=19,
+ in_index=0,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ dict(
+ type='FCNHead',
+ in_channels=128,
+ channels=128,
+ num_convs=1,
+ num_classes=19,
+ in_index=1,
+ norm_cfg=norm_cfg,
+ concat_input=False,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+ ],
+ # model training and testing settings
+ train_cfg=dict(),
+ test_cfg=dict(mode='whole'))
diff --git a/configs/bisenetv1/README.md b/configs/bisenetv1/README.md
index 344781068a..dd5bd503b2 100644
--- a/configs/bisenetv1/README.md
+++ b/configs/bisenetv1/README.md
@@ -32,7 +32,7 @@
| BiSeNetV1 (No Pretrain) | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.44 | 77.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json) |
| BiSeNetV1| R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.37 | 76.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json) |
| BiSeNetV1 (4x8) | R-18-D32 | 1024x1024 | 160000 | 11.17 | 31.77 | 75.16 | 77.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json) |
-| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 3.3 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
+| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) |
| BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) |
Note:
diff --git a/configs/bisenetv1/bisenetv1.yml b/configs/bisenetv1/bisenetv1.yml
index 6de872b863..8ea94df4bd 100644
--- a/configs/bisenetv1/bisenetv1.yml
+++ b/configs/bisenetv1/bisenetv1.yml
@@ -92,7 +92,7 @@ Models:
batch size: 1
mode: FP32
resolution: (1024,1024)
- memory (GB): 3.3
+ memory (GB): 15.39
Results:
- Task: Semantic Segmentation
Dataset: Cityscapes
diff --git a/configs/icnet/README.md b/configs/icnet/README.md
new file mode 100644
index 0000000000..62d2040aa5
--- /dev/null
+++ b/configs/icnet/README.md
@@ -0,0 +1,45 @@
+# ICNet for Real-time Semantic Segmentation on High-resolution Images
+
+## Introduction
+
+
+
+Official Repo
+
+Code Snippet
+
+
+ICNet (ECCV'2018)
+
+```latext
+@inproceedings{zhao2018icnet,
+ title={Icnet for real-time semantic segmentation on high-resolution images},
+ author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={405--420},
+ year={2018}
+}
+```
+
+
+
+## Results and models
+
+### Cityscapes
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
+| ------ | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) |
+| ICNet | R-18-D8 | 832x832 | 160000 | - | - | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) |
+| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) |
+| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) |
+| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) |
+| ICNet | R-50-D8 | 832x832 | 160000 | - | - | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) |
+| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) |
+| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) |
+| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) |
+| ICNet | R-101-D8 | 832x832 | 160000 | - | - | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) |
+| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) |
+| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) |
+
+Note: `in1k-pre` means pretrained model is used.
diff --git a/configs/icnet/icnet.yml b/configs/icnet/icnet.yml
new file mode 100644
index 0000000000..9d50e91194
--- /dev/null
+++ b/configs/icnet/icnet.yml
@@ -0,0 +1,207 @@
+Collections:
+- Name: icnet
+ Metadata:
+ Training Data:
+ - Cityscapes
+ Paper:
+ URL: https://arxiv.org/abs/1704.08545
+ Title: ICNet for Real-time Semantic Segmentation on High-resolution Images
+ README: configs/icnet/README.md
+ Code:
+ URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77
+ Version: v0.18.0
+ Converted From:
+ Code: https://github.com/hszhao/ICNet
+Models:
+- Name: icnet_r18-d8_832x832_80k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-18-D8
+ crop size: (832,832)
+ lr schd: 80000
+ inference time (ms/im):
+ - value: 36.87
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (832,832)
+ memory (GB): 1.7
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 68.14
+ mIoU(ms+flip): 70.16
+ Config: configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth
+- Name: icnet_r18-d8_832x832_160k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-18-D8
+ crop size: (832,832)
+ lr schd: 160000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 71.64
+ mIoU(ms+flip): 74.18
+ Config: configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth
+- Name: icnet_r18-d8_in1k-pre_832x832_80k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-18-D8
+ crop size: (832,832)
+ lr schd: 80000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 72.51
+ mIoU(ms+flip): 74.78
+ Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth
+- Name: icnet_r18-d8_in1k-pre_832x832_160k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-18-D8
+ crop size: (832,832)
+ lr schd: 160000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 74.43
+ mIoU(ms+flip): 76.72
+ Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth
+- Name: icnet_r50-d8_832x832_80k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-50-D8
+ crop size: (832,832)
+ lr schd: 80000
+ inference time (ms/im):
+ - value: 49.8
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (832,832)
+ memory (GB): 2.53
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 68.91
+ mIoU(ms+flip): 69.72
+ Config: configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth
+- Name: icnet_r50-d8_832x832_160k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-50-D8
+ crop size: (832,832)
+ lr schd: 160000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 73.82
+ mIoU(ms+flip): 75.67
+ Config: configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth
+- Name: icnet_r50-d8_in1k-pre_832x832_80k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-50-D8
+ crop size: (832,832)
+ lr schd: 80000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 74.58
+ mIoU(ms+flip): 76.41
+ Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth
+- Name: icnet_r50-d8_in1k-pre_832x832_160k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-50-D8
+ crop size: (832,832)
+ lr schd: 160000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 76.29
+ mIoU(ms+flip): 78.09
+ Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth
+- Name: icnet_r101-d8_832x832_80k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-101-D8
+ crop size: (832,832)
+ lr schd: 80000
+ inference time (ms/im):
+ - value: 59.0
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (832,832)
+ memory (GB): 3.08
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 70.28
+ mIoU(ms+flip): 71.95
+ Config: configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth
+- Name: icnet_r101-d8_832x832_160k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-101-D8
+ crop size: (832,832)
+ lr schd: 160000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 73.8
+ mIoU(ms+flip): 76.1
+ Config: configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth
+- Name: icnet_r101-d8_in1k-pre_832x832_80k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-101-D8
+ crop size: (832,832)
+ lr schd: 80000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 75.57
+ mIoU(ms+flip): 77.86
+ Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth
+- Name: icnet_r101-d8_in1k-pre_832x832_160k_cityscapes
+ In Collection: icnet
+ Metadata:
+ backbone: R-101-D8
+ crop size: (832,832)
+ lr schd: 160000
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: Cityscapes
+ Metrics:
+ mIoU: 76.15
+ mIoU(ms+flip): 77.98
+ Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth
diff --git a/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py
new file mode 100644
index 0000000000..24cbf537d4
--- /dev/null
+++ b/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
+model = dict(backbone=dict(backbone_cfg=dict(depth=101)))
diff --git a/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py
new file mode 100644
index 0000000000..f3338b5944
--- /dev/null
+++ b/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py
@@ -0,0 +1,2 @@
+_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
+model = dict(backbone=dict(backbone_cfg=dict(depth=101)))
diff --git a/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py
new file mode 100644
index 0000000000..74ac355088
--- /dev/null
+++ b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py
@@ -0,0 +1,7 @@
+_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
+model = dict(
+ backbone=dict(
+ backbone_cfg=dict(
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))
diff --git a/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py
new file mode 100644
index 0000000000..b4ba6d640d
--- /dev/null
+++ b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py
@@ -0,0 +1,7 @@
+_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
+model = dict(
+ backbone=dict(
+ backbone_cfg=dict(
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))))
diff --git a/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py
new file mode 100644
index 0000000000..877b775afc
--- /dev/null
+++ b/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py
@@ -0,0 +1,3 @@
+_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
+model = dict(
+ backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18)))
diff --git a/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py
new file mode 100644
index 0000000000..786c7cc92a
--- /dev/null
+++ b/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py
@@ -0,0 +1,3 @@
+_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
+model = dict(
+ backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18)))
diff --git a/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py
new file mode 100644
index 0000000000..cc47951f3d
--- /dev/null
+++ b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py
@@ -0,0 +1,8 @@
+_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
+model = dict(
+ backbone=dict(
+ layer_channels=(128, 512),
+ backbone_cfg=dict(
+ depth=18,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))))
diff --git a/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py
new file mode 100644
index 0000000000..00b0fe0522
--- /dev/null
+++ b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py
@@ -0,0 +1,8 @@
+_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
+model = dict(
+ backbone=dict(
+ layer_channels=(128, 512),
+ backbone_cfg=dict(
+ depth=18,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))))
diff --git a/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py
new file mode 100644
index 0000000000..5b9fd9b09e
--- /dev/null
+++ b/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/icnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_160k.py'
+]
diff --git a/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py
new file mode 100644
index 0000000000..e0336c99db
--- /dev/null
+++ b/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py
@@ -0,0 +1,5 @@
+_base_ = [
+ '../_base_/models/icnet_r50-d8.py',
+ '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_80k.py'
+]
diff --git a/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py
new file mode 100644
index 0000000000..6f7a0a1a36
--- /dev/null
+++ b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py
@@ -0,0 +1,6 @@
+_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py'
+model = dict(
+ backbone=dict(
+ backbone_cfg=dict(
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))
diff --git a/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py
new file mode 100644
index 0000000000..57546cd291
--- /dev/null
+++ b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py
@@ -0,0 +1,6 @@
+_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py'
+model = dict(
+ backbone=dict(
+ backbone_cfg=dict(
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://resnet50_v1c'))))
diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py
index 1f88bdda6c..6d320323b8 100644
--- a/mmseg/models/backbones/__init__.py
+++ b/mmseg/models/backbones/__init__.py
@@ -4,6 +4,7 @@
from .cgnet import CGNet
from .fast_scnn import FastSCNN
from .hrnet import HRNet
+from .icnet import ICNet
from .mit import MixVisionTransformer
from .mobilenet_v2 import MobileNetV2
from .mobilenet_v3 import MobileNetV3
@@ -18,5 +19,5 @@
'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
- 'BiSeNetV1', 'BiSeNetV2'
+ 'BiSeNetV1', 'BiSeNetV2', 'ICNet'
]
diff --git a/mmseg/models/backbones/icnet.py b/mmseg/models/backbones/icnet.py
new file mode 100644
index 0000000000..10e5427858
--- /dev/null
+++ b/mmseg/models/backbones/icnet.py
@@ -0,0 +1,165 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
+
+from mmseg.ops import resize
+from ..builder import BACKBONES, build_backbone
+from ..decode_heads.psp_head import PPM
+
+
+@BACKBONES.register_module()
+class ICNet(BaseModule):
+ """ICNet for Real-Time Semantic Segmentation on High-Resolution Images.
+
+ This backbone is the implementation of
+ `ICNet `_.
+
+ Args:
+ backbone_cfg (dict): Config dict to build backbone. Usually it is
+ ResNet but it can also be other backbones.
+ in_channels (int): The number of input image channels. Default: 3.
+ layer_channels (Sequence[int]): The numbers of feature channels at
+ layer 2 and layer 4 in ResNet. It can also be other backbones.
+ Default: (512, 2048).
+ light_branch_middle_channels (int): The number of channels of the
+ middle layer in light branch. Default: 32.
+ psp_out_channels (int): The number of channels of the output of PSP
+ module. Default: 512.
+ out_channels (Sequence[int]): The numbers of output feature channels
+ at each branches. Default: (64, 256, 256).
+ pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
+ Module. Default: (1, 2, 3, 6).
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: None.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Dictionary to construct and config act layer.
+ Default: dict(type='ReLU').
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ init_cfg (dict or list[dict], optional): Initialization config dict.
+ Default: None.
+ """
+
+ def __init__(self,
+ backbone_cfg,
+ in_channels=3,
+ layer_channels=(512, 2048),
+ light_branch_middle_channels=32,
+ psp_out_channels=512,
+ out_channels=(64, 256, 256),
+ pool_scales=(1, 2, 3, 6),
+ conv_cfg=None,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ act_cfg=dict(type='ReLU'),
+ align_corners=False,
+ init_cfg=None):
+ if backbone_cfg is None:
+ raise TypeError('backbone_cfg must be passed from config file!')
+ if init_cfg is None:
+ init_cfg = [
+ dict(type='Kaiming', mode='fan_out', layer='Conv2d'),
+ dict(type='Constant', val=1, layer='_BatchNorm'),
+ dict(type='Normal', mean=0.01, layer='Linear')
+ ]
+ super(ICNet, self).__init__(init_cfg=init_cfg)
+ self.align_corners = align_corners
+ self.backbone = build_backbone(backbone_cfg)
+
+ # Note: Default `ceil_mode` is false in nn.MaxPool2d, set
+ # `ceil_mode=True` to keep information in the corner of feature map.
+ self.backbone.maxpool = nn.MaxPool2d(
+ kernel_size=3, stride=2, padding=1, ceil_mode=True)
+
+ self.psp_modules = PPM(
+ pool_scales=pool_scales,
+ in_channels=layer_channels[1],
+ channels=psp_out_channels,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg,
+ align_corners=align_corners)
+
+ self.psp_bottleneck = ConvModule(
+ layer_channels[1] + len(pool_scales) * psp_out_channels,
+ psp_out_channels,
+ 3,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ self.conv_sub1 = nn.Sequential(
+ ConvModule(
+ in_channels=in_channels,
+ out_channels=light_branch_middle_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg),
+ ConvModule(
+ in_channels=light_branch_middle_channels,
+ out_channels=light_branch_middle_channels,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg),
+ ConvModule(
+ in_channels=light_branch_middle_channels,
+ out_channels=out_channels[0],
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg))
+
+ self.conv_sub2 = ConvModule(
+ layer_channels[0],
+ out_channels[1],
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ self.conv_sub4 = ConvModule(
+ psp_out_channels,
+ out_channels[2],
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg)
+
+ def forward(self, x):
+ output = []
+
+ # sub 1
+ output.append(self.conv_sub1(x))
+
+ # sub 2
+ x = resize(
+ x,
+ scale_factor=0.5,
+ mode='bilinear',
+ align_corners=self.align_corners)
+ x = self.backbone.stem(x)
+ x = self.backbone.maxpool(x)
+ x = self.backbone.layer1(x)
+ x = self.backbone.layer2(x)
+ output.append(self.conv_sub2(x))
+
+ # sub 4
+ x = resize(
+ x,
+ scale_factor=0.5,
+ mode='bilinear',
+ align_corners=self.align_corners)
+ x = self.backbone.layer3(x)
+ x = self.backbone.layer4(x)
+ psp_outs = self.psp_modules(x) + [x]
+ psp_outs = torch.cat(psp_outs, dim=1)
+ x = self.psp_bottleneck(psp_outs)
+
+ output.append(self.conv_sub4(x))
+
+ return output
diff --git a/mmseg/models/necks/__init__.py b/mmseg/models/necks/__init__.py
index c496853c83..15edad493c 100644
--- a/mmseg/models/necks/__init__.py
+++ b/mmseg/models/necks/__init__.py
@@ -1,6 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .fpn import FPN
+from .ic_neck import ICNeck
from .mla_neck import MLANeck
from .multilevel_neck import MultiLevelNeck
-__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck']
+__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck']
diff --git a/mmseg/models/necks/ic_neck.py b/mmseg/models/necks/ic_neck.py
new file mode 100644
index 0000000000..d836a6b9ce
--- /dev/null
+++ b/mmseg/models/necks/ic_neck.py
@@ -0,0 +1,147 @@
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+from mmcv.runner import BaseModule
+
+from mmseg.ops import resize
+from ..builder import NECKS
+
+
+class CascadeFeatureFusion(BaseModule):
+ """Cascade Feature Fusion Unit in ICNet.
+
+ Args:
+ low_channels (int): The number of input channels for
+ low resolution feature map.
+ high_channels (int): The number of input channels for
+ high resolution feature map.
+ out_channels (int): The number of output channels.
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: None.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Dictionary to construct and config act layer.
+ Default: dict(type='ReLU').
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ init_cfg (dict or list[dict], optional): Initialization config dict.
+ Default: None.
+
+ Returns:
+ x (Tensor): The output tensor of shape (N, out_channels, H, W).
+ x_low (Tensor): The output tensor of shape (N, out_channels, H, W)
+ for Cascade Label Guidance in auxiliary heads.
+ """
+
+ def __init__(self,
+ low_channels,
+ high_channels,
+ out_channels,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ align_corners=False,
+ init_cfg=None):
+ super(CascadeFeatureFusion, self).__init__(init_cfg=init_cfg)
+ self.align_corners = align_corners
+ self.conv_low = ConvModule(
+ low_channels,
+ out_channels,
+ 3,
+ padding=2,
+ dilation=2,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+ self.conv_high = ConvModule(
+ high_channels,
+ out_channels,
+ 1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=act_cfg)
+
+ def forward(self, x_low, x_high):
+ x_low = resize(
+ x_low,
+ size=x_high.size()[2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ # Note: Different from original paper, `x_low` is underwent
+ # `self.conv_low` rather than another 1x1 conv classifier
+ # before being used for auxiliary head.
+ x_low = self.conv_low(x_low)
+ x_high = self.conv_high(x_high)
+ x = x_low + x_high
+ x = F.relu(x, inplace=True)
+ return x, x_low
+
+
+@NECKS.register_module()
+class ICNeck(BaseModule):
+ """ICNet for Real-Time Semantic Segmentation on High-Resolution Images.
+
+ This head is the implementation of `ICHead
+ `_.
+
+ Args:
+ in_channels (int): The number of input image channels. Default: 3.
+ out_channels (int): The numbers of output feature channels.
+ Default: 128.
+ conv_cfg (dict): Dictionary to construct and config conv layer.
+ Default: None.
+ norm_cfg (dict): Dictionary to construct and config norm layer.
+ Default: dict(type='BN').
+ act_cfg (dict): Dictionary to construct and config act layer.
+ Default: dict(type='ReLU').
+ align_corners (bool): align_corners argument of F.interpolate.
+ Default: False.
+ init_cfg (dict or list[dict], optional): Initialization config dict.
+ Default: None.
+ """
+
+ def __init__(self,
+ in_channels=(64, 256, 256),
+ out_channels=128,
+ conv_cfg=None,
+ norm_cfg=dict(type='BN'),
+ act_cfg=dict(type='ReLU'),
+ align_corners=False,
+ init_cfg=None):
+ super(ICNeck, self).__init__(init_cfg=init_cfg)
+ assert len(in_channels) == 3, 'Length of input channels \
+ must be 3!'
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.align_corners = align_corners
+ self.cff_24 = CascadeFeatureFusion(
+ self.in_channels[2],
+ self.in_channels[1],
+ self.out_channels,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ align_corners=self.align_corners)
+
+ self.cff_12 = CascadeFeatureFusion(
+ self.out_channels,
+ self.in_channels[0],
+ self.out_channels,
+ conv_cfg=self.conv_cfg,
+ norm_cfg=self.norm_cfg,
+ act_cfg=self.act_cfg,
+ align_corners=self.align_corners)
+
+ def forward(self, inputs):
+ assert len(inputs) == 3, 'Length of input feature \
+ maps must be 3!'
+
+ x_sub1, x_sub2, x_sub4 = inputs
+ x_cff_24, x_24 = self.cff_24(x_sub4, x_sub2)
+ x_cff_12, x_12 = self.cff_12(x_cff_24, x_sub1)
+ # Note: `x_cff_12` is used for decode_head,
+ # `x_24` and `x_12` are used for auxiliary head.
+ return x_24, x_12, x_cff_12
diff --git a/model-index.yml b/model-index.yml
index 7d18380c76..f0f9bb80e9 100644
--- a/model-index.yml
+++ b/model-index.yml
@@ -18,6 +18,7 @@ Import:
- configs/fp16/fp16.yml
- configs/gcnet/gcnet.yml
- configs/hrnet/hrnet.yml
+- configs/icnet/icnet.yml
- configs/isanet/isanet.yml
- configs/mobilenet_v2/mobilenet_v2.yml
- configs/mobilenet_v3/mobilenet_v3.yml
diff --git a/tests/test_models/test_backbones/test_icnet.py b/tests/test_models/test_backbones/test_icnet.py
new file mode 100644
index 0000000000..a5861d8344
--- /dev/null
+++ b/tests/test_models/test_backbones/test_icnet.py
@@ -0,0 +1,48 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmseg.models.backbones import ICNet
+
+
+def test_icnet_backbone():
+ with pytest.raises(TypeError):
+ # Must give backbone dict in config file.
+ ICNet(
+ in_channels=3,
+ layer_channels=(512, 2048),
+ light_branch_middle_channels=32,
+ psp_out_channels=512,
+ out_channels=(64, 256, 256),
+ backbone_cfg=None)
+
+ # Test ICNet Standard Forward
+ model = ICNet(
+ backbone_cfg=dict(
+ type='ResNetV1c',
+ in_channels=3,
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True), )
+ assert hasattr(model.backbone,
+ 'maxpool') and model.backbone.maxpool.ceil_mode is True
+ model.init_weights()
+ model.train()
+ batch_size = 2
+ imgs = torch.randn(batch_size, 3, 512, 1024)
+ feat = model(imgs)
+
+ assert model.psp_modules[0][0].output_size == 1
+ assert model.psp_modules[1][0].output_size == 2
+ assert model.psp_modules[2][0].output_size == 3
+ assert model.psp_bottleneck.padding == 1
+ assert model.conv_sub1[0].padding == 1
+
+ assert len(feat) == 3
+ assert feat[0].shape == torch.Size([batch_size, 64, 64, 128])
diff --git a/tests/test_models/test_backbones/test_swin.py b/tests/test_models/test_backbones/test_swin.py
index 0529d1e321..83e0379637 100644
--- a/tests/test_models/test_backbones/test_swin.py
+++ b/tests/test_models/test_backbones/test_swin.py
@@ -50,22 +50,22 @@ def test_swin_transformer():
model(temp)
# Test normal inference
- temp = torch.randn((1, 3, 512, 512))
+ temp = torch.randn((1, 3, 256, 256))
model = SwinTransformer()
outs = model(temp)
- assert outs[0].shape == (1, 96, 128, 128)
- assert outs[1].shape == (1, 192, 64, 64)
- assert outs[2].shape == (1, 384, 32, 32)
- assert outs[3].shape == (1, 768, 16, 16)
+ assert outs[0].shape == (1, 96, 64, 64)
+ assert outs[1].shape == (1, 192, 32, 32)
+ assert outs[2].shape == (1, 384, 16, 16)
+ assert outs[3].shape == (1, 768, 8, 8)
# Test abnormal inference size
- temp = torch.randn((1, 3, 511, 511))
+ temp = torch.randn((1, 3, 255, 255))
model = SwinTransformer()
outs = model(temp)
- assert outs[0].shape == (1, 96, 128, 128)
- assert outs[1].shape == (1, 192, 64, 64)
- assert outs[2].shape == (1, 384, 32, 32)
- assert outs[3].shape == (1, 768, 16, 16)
+ assert outs[0].shape == (1, 96, 64, 64)
+ assert outs[1].shape == (1, 192, 32, 32)
+ assert outs[2].shape == (1, 384, 16, 16)
+ assert outs[3].shape == (1, 768, 8, 8)
# Test abnormal inference size
temp = torch.randn((1, 3, 112, 137))
@@ -89,7 +89,7 @@ def test_swin_transformer():
assert not p.requires_grad
# Test Swin with checkpoint forward
- temp = torch.randn((1, 3, 224, 224))
+ temp = torch.randn((1, 3, 112, 112))
model = SwinTransformer(with_cp=True)
for m in model.modules():
if isinstance(m, SwinBlock):
diff --git a/tests/test_models/test_backbones/test_unet.py b/tests/test_models/test_backbones/test_unet.py
index 3a035c8f0b..c4f2faca3f 100644
--- a/tests/test_models/test_backbones/test_unet.py
+++ b/tests/test_models/test_backbones/test_unet.py
@@ -345,7 +345,7 @@ def test_unet():
# case is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=4,
strides=(1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2),
@@ -362,7 +362,7 @@ def test_unet():
# case is 16.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -379,7 +379,7 @@ def test_unet():
# case is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -396,7 +396,7 @@ def test_unet():
# case is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 2, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -413,7 +413,7 @@ def test_unet():
# case is 32.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=6,
strides=(1, 1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2, 2),
@@ -428,7 +428,7 @@ def test_unet():
# Check if num_stages matchs strides, len(strides)=num_stages
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -443,7 +443,7 @@ def test_unet():
# Check if num_stages matchs strides, len(enc_num_convs)=num_stages
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2),
@@ -458,7 +458,7 @@ def test_unet():
# Check if num_stages matchs strides, len(dec_num_convs)=num_stages-1
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -473,7 +473,7 @@ def test_unet():
# Check if num_stages matchs strides, len(downsamples)=num_stages-1
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -488,7 +488,7 @@ def test_unet():
# Check if num_stages matchs strides, len(enc_dilations)=num_stages
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -503,7 +503,7 @@ def test_unet():
# Check if num_stages matchs strides, len(dec_dilations)=num_stages-1
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -517,7 +517,7 @@ def test_unet():
# test UNet norm_eval=True
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -532,7 +532,7 @@ def test_unet():
# test UNet norm_eval=False
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -547,7 +547,7 @@ def test_unet():
# test UNet forward and outputs. The whole downsample rate is 16.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -558,16 +558,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 8, 8])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -578,16 +578,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 2, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -598,16 +598,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -618,16 +618,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
- assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
+ assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -638,16 +638,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
- assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
+ assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -658,16 +658,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -678,16 +678,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
- assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
+ assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 2.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -698,16 +698,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 64, 64])
- assert x_outs[1].shape == torch.Size([2, 512, 64, 64])
- assert x_outs[2].shape == torch.Size([2, 256, 64, 64])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 64, 64])
+ assert x_outs[1].shape == torch.Size([2, 32, 64, 64])
+ assert x_outs[2].shape == torch.Size([2, 16, 64, 64])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 1.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -718,16 +718,16 @@ def test_unet():
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 128, 128])
- assert x_outs[1].shape == torch.Size([2, 512, 128, 128])
- assert x_outs[2].shape == torch.Size([2, 256, 128, 128])
- assert x_outs[3].shape == torch.Size([2, 128, 128, 128])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[1].shape == torch.Size([2, 32, 128, 128])
+ assert x_outs[2].shape == torch.Size([2, 16, 128, 128])
+ assert x_outs[3].shape == torch.Size([2, 8, 128, 128])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 16.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -737,16 +737,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 8, 8])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 8, 8])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -756,16 +756,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 8.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 2, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -775,16 +775,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 16, 16])
- assert x_outs[1].shape == torch.Size([2, 512, 16, 16])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 16, 16])
+ assert x_outs[1].shape == torch.Size([2, 32, 16, 16])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet forward and outputs. The whole downsample rate is 4.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -794,16 +794,16 @@ def test_unet():
dec_dilations=(1, 1, 1, 1))
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
- assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
+ assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
# test UNet init_weights method.
unet = UNet(
in_channels=3,
- base_channels=64,
+ base_channels=4,
num_stages=5,
strides=(1, 2, 2, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
@@ -815,8 +815,8 @@ def test_unet():
unet.init_weights()
x = torch.randn(2, 3, 128, 128)
x_outs = unet(x)
- assert x_outs[0].shape == torch.Size([2, 1024, 32, 32])
- assert x_outs[1].shape == torch.Size([2, 512, 32, 32])
- assert x_outs[2].shape == torch.Size([2, 256, 32, 32])
- assert x_outs[3].shape == torch.Size([2, 128, 64, 64])
- assert x_outs[4].shape == torch.Size([2, 64, 128, 128])
+ assert x_outs[0].shape == torch.Size([2, 64, 32, 32])
+ assert x_outs[1].shape == torch.Size([2, 32, 32, 32])
+ assert x_outs[2].shape == torch.Size([2, 16, 32, 32])
+ assert x_outs[3].shape == torch.Size([2, 8, 64, 64])
+ assert x_outs[4].shape == torch.Size([2, 4, 128, 128])
diff --git a/tests/test_models/test_necks/test_ic_neck.py b/tests/test_models/test_necks/test_ic_neck.py
new file mode 100644
index 0000000000..10b10609f9
--- /dev/null
+++ b/tests/test_models/test_necks/test_ic_neck.py
@@ -0,0 +1,53 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import pytest
+import torch
+
+from mmseg.models.necks import ICNeck
+from mmseg.models.necks.ic_neck import CascadeFeatureFusion
+from ..test_heads.utils import _conv_has_norm, to_cuda
+
+
+def test_ic_neck():
+ # test with norm_cfg
+ neck = ICNeck(
+ in_channels=(64, 256, 256),
+ out_channels=128,
+ norm_cfg=dict(type='SyncBN'),
+ align_corners=False)
+ assert _conv_has_norm(neck, sync_bn=True)
+
+ inputs = [
+ torch.randn(1, 64, 128, 256),
+ torch.randn(1, 256, 65, 129),
+ torch.randn(1, 256, 32, 64)
+ ]
+ neck = ICNeck(
+ in_channels=(64, 256, 256),
+ out_channels=128,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ align_corners=False)
+ if torch.cuda.is_available():
+ neck, inputs = to_cuda(neck, inputs)
+
+ outputs = neck(inputs)
+ assert outputs[0].shape == (1, 128, 65, 129)
+ assert outputs[1].shape == (1, 128, 128, 256)
+ assert outputs[1].shape == (1, 128, 128, 256)
+
+
+def test_ic_neck_cascade_feature_fusion():
+ cff = CascadeFeatureFusion(256, 256, 128)
+ assert cff.conv_low.in_channels == 256
+ assert cff.conv_low.out_channels == 128
+ assert cff.conv_high.in_channels == 256
+ assert cff.conv_high.out_channels == 128
+
+
+def test_ic_neck_input_channels():
+ with pytest.raises(AssertionError):
+ # ICNet Neck input channel constraints.
+ ICNeck(
+ in_channels=(64, 256, 256, 256),
+ out_channels=128,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ align_corners=False)
diff --git a/tools/benchmark_new.py b/tools/benchmark_new.py
new file mode 100644
index 0000000000..e69de29bb2