diff --git a/README.md b/README.md index 2443171c86..7389577fe1 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Supported methods: - [x] [PSANet (ECCV'2018)](configs/psanet) - [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus) - [x] [UPerNet (ECCV'2018)](configs/upernet) +- [x] [ICNet (ECCV'2018)](configs/icnet) - [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net) - [x] [EncNet (CVPR'2018)](configs/encnet) - [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn) diff --git a/README_zh-CN.md b/README_zh-CN.md index ac90eefeef..2622ed0f79 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -78,6 +78,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O - [x] [PSANet (ECCV'2018)](configs/psanet) - [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus) - [x] [UPerNet (ECCV'2018)](configs/upernet) +- [x] [ICNet (ECCV'2018)](configs/icnet) - [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net) - [x] [EncNet (CVPR'2018)](configs/encnet) - [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn) diff --git a/configs/_base_/datasets/cityscapes_832x832.py b/configs/_base_/datasets/cityscapes_832x832.py new file mode 100644 index 0000000000..b9325cc008 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_832x832.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (832, 832) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/_base_/models/icnet_r50-d8.py b/configs/_base_/models/icnet_r50-d8.py new file mode 100644 index 0000000000..d7273cd28e --- /dev/null +++ b/configs/_base_/models/icnet_r50-d8.py @@ -0,0 +1,74 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='ICNet', + backbone_cfg=dict( + type='ResNetV1c', + in_channels=3, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + norm_cfg=norm_cfg, + align_corners=False, + ), + neck=dict( + type='ICNeck', + in_channels=(64, 256, 256), + out_channels=128, + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + in_index=2, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/bisenetv1/README.md b/configs/bisenetv1/README.md index 344781068a..dd5bd503b2 100644 --- a/configs/bisenetv1/README.md +++ b/configs/bisenetv1/README.md @@ -32,7 +32,7 @@ | BiSeNetV1 (No Pretrain) | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.44 | 77.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json) | | BiSeNetV1| R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.37 | 76.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json) | | BiSeNetV1 (4x8) | R-18-D32 | 1024x1024 | 160000 | 11.17 | 31.77 | 75.16 | 77.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json) | -| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 3.3 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) | +| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) | | BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) | Note: diff --git a/configs/bisenetv1/bisenetv1.yml b/configs/bisenetv1/bisenetv1.yml index 6de872b863..8ea94df4bd 100644 --- a/configs/bisenetv1/bisenetv1.yml +++ b/configs/bisenetv1/bisenetv1.yml @@ -92,7 +92,7 @@ Models: batch size: 1 mode: FP32 resolution: (1024,1024) - memory (GB): 3.3 + memory (GB): 15.39 Results: - Task: Semantic Segmentation Dataset: Cityscapes diff --git a/configs/icnet/README.md b/configs/icnet/README.md new file mode 100644 index 0000000000..62d2040aa5 --- /dev/null +++ b/configs/icnet/README.md @@ -0,0 +1,45 @@ +# ICNet for Real-time Semantic Segmentation on High-resolution Images + +## Introduction + + + +Official Repo + +Code Snippet + +
+ICNet (ECCV'2018) + +```latext +@inproceedings{zhao2018icnet, + title={Icnet for real-time semantic segmentation on high-resolution images}, + author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={405--420}, + year={2018} +} +``` + +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) | +| ICNet | R-18-D8 | 832x832 | 160000 | - | - | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) | +| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) | +| ICNet | R-50-D8 | 832x832 | 160000 | - | - | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) | +| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) | +| ICNet | R-101-D8 | 832x832 | 160000 | - | - | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) | + +Note: `in1k-pre` means pretrained model is used. diff --git a/configs/icnet/icnet.yml b/configs/icnet/icnet.yml new file mode 100644 index 0000000000..9d50e91194 --- /dev/null +++ b/configs/icnet/icnet.yml @@ -0,0 +1,207 @@ +Collections: +- Name: icnet + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1704.08545 + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + README: configs/icnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Version: v0.18.0 + Converted From: + Code: https://github.com/hszhao/ICNet +Models: +- Name: icnet_r18-d8_832x832_80k_cityscapes + In Collection: icnet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 80000 + inference time (ms/im): + - value: 36.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (832,832) + memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.14 + mIoU(ms+flip): 70.16 + Config: configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth +- Name: icnet_r18-d8_832x832_160k_cityscapes + In Collection: icnet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.64 + mIoU(ms+flip): 74.18 + Config: configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth +- Name: icnet_r18-d8_in1k-pre_832x832_80k_cityscapes + In Collection: icnet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.51 + mIoU(ms+flip): 74.78 + Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth +- Name: icnet_r18-d8_in1k-pre_832x832_160k_cityscapes + In Collection: icnet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.43 + mIoU(ms+flip): 76.72 + Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth +- Name: icnet_r50-d8_832x832_80k_cityscapes + In Collection: icnet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 80000 + inference time (ms/im): + - value: 49.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (832,832) + memory (GB): 2.53 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.91 + mIoU(ms+flip): 69.72 + Config: configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth +- Name: icnet_r50-d8_832x832_160k_cityscapes + In Collection: icnet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.82 + mIoU(ms+flip): 75.67 + Config: configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth +- Name: icnet_r50-d8_in1k-pre_832x832_80k_cityscapes + In Collection: icnet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.58 + mIoU(ms+flip): 76.41 + Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth +- Name: icnet_r50-d8_in1k-pre_832x832_160k_cityscapes + In Collection: icnet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.29 + mIoU(ms+flip): 78.09 + Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth +- Name: icnet_r101-d8_832x832_80k_cityscapes + In Collection: icnet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 80000 + inference time (ms/im): + - value: 59.0 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (832,832) + memory (GB): 3.08 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.28 + mIoU(ms+flip): 71.95 + Config: configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth +- Name: icnet_r101-d8_832x832_160k_cityscapes + In Collection: icnet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.8 + mIoU(ms+flip): 76.1 + Config: configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth +- Name: icnet_r101-d8_in1k-pre_832x832_80k_cityscapes + In Collection: icnet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.57 + mIoU(ms+flip): 77.86 + Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth +- Name: icnet_r101-d8_in1k-pre_832x832_160k_cityscapes + In Collection: icnet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.15 + mIoU(ms+flip): 77.98 + Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth diff --git a/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py new file mode 100644 index 0000000000..24cbf537d4 --- /dev/null +++ b/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict(backbone=dict(backbone_cfg=dict(depth=101))) diff --git a/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py new file mode 100644 index 0000000000..f3338b5944 --- /dev/null +++ b/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict(backbone=dict(backbone_cfg=dict(depth=101))) diff --git a/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py new file mode 100644 index 0000000000..74ac355088 --- /dev/null +++ b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py new file mode 100644 index 0000000000..b4ba6d640d --- /dev/null +++ b/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py new file mode 100644 index 0000000000..877b775afc --- /dev/null +++ b/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18))) diff --git a/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py new file mode 100644 index 0000000000..786c7cc92a --- /dev/null +++ b/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18))) diff --git a/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py new file mode 100644 index 0000000000..cc47951f3d --- /dev/null +++ b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict( + layer_channels=(128, 512), + backbone_cfg=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) diff --git a/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py new file mode 100644 index 0000000000..00b0fe0522 --- /dev/null +++ b/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict( + layer_channels=(128, 512), + backbone_cfg=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) diff --git a/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py b/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py new file mode 100644 index 0000000000..5b9fd9b09e --- /dev/null +++ b/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/icnet_r50-d8.py', + '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] diff --git a/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py b/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py new file mode 100644 index 0000000000..e0336c99db --- /dev/null +++ b/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/icnet_r50-d8.py', + '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] diff --git a/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py new file mode 100644 index 0000000000..6f7a0a1a36 --- /dev/null +++ b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py @@ -0,0 +1,6 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py new file mode 100644 index 0000000000..57546cd291 --- /dev/null +++ b/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py @@ -0,0 +1,6 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py index 1f88bdda6c..6d320323b8 100644 --- a/mmseg/models/backbones/__init__.py +++ b/mmseg/models/backbones/__init__.py @@ -4,6 +4,7 @@ from .cgnet import CGNet from .fast_scnn import FastSCNN from .hrnet import HRNet +from .icnet import ICNet from .mit import MixVisionTransformer from .mobilenet_v2 import MobileNetV2 from .mobilenet_v3 import MobileNetV3 @@ -18,5 +19,5 @@ 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', 'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer', - 'BiSeNetV1', 'BiSeNetV2' + 'BiSeNetV1', 'BiSeNetV2', 'ICNet' ] diff --git a/mmseg/models/backbones/icnet.py b/mmseg/models/backbones/icnet.py new file mode 100644 index 0000000000..10e5427858 --- /dev/null +++ b/mmseg/models/backbones/icnet.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import BACKBONES, build_backbone +from ..decode_heads.psp_head import PPM + + +@BACKBONES.register_module() +class ICNet(BaseModule): + """ICNet for Real-Time Semantic Segmentation on High-Resolution Images. + + This backbone is the implementation of + `ICNet `_. + + Args: + backbone_cfg (dict): Config dict to build backbone. Usually it is + ResNet but it can also be other backbones. + in_channels (int): The number of input image channels. Default: 3. + layer_channels (Sequence[int]): The numbers of feature channels at + layer 2 and layer 4 in ResNet. It can also be other backbones. + Default: (512, 2048). + light_branch_middle_channels (int): The number of channels of the + middle layer in light branch. Default: 32. + psp_out_channels (int): The number of channels of the output of PSP + module. Default: 512. + out_channels (Sequence[int]): The numbers of output feature channels + at each branches. Default: (64, 256, 256). + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + backbone_cfg, + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + if backbone_cfg is None: + raise TypeError('backbone_cfg must be passed from config file!') + if init_cfg is None: + init_cfg = [ + dict(type='Kaiming', mode='fan_out', layer='Conv2d'), + dict(type='Constant', val=1, layer='_BatchNorm'), + dict(type='Normal', mean=0.01, layer='Linear') + ] + super(ICNet, self).__init__(init_cfg=init_cfg) + self.align_corners = align_corners + self.backbone = build_backbone(backbone_cfg) + + # Note: Default `ceil_mode` is false in nn.MaxPool2d, set + # `ceil_mode=True` to keep information in the corner of feature map. + self.backbone.maxpool = nn.MaxPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=True) + + self.psp_modules = PPM( + pool_scales=pool_scales, + in_channels=layer_channels[1], + channels=psp_out_channels, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + align_corners=align_corners) + + self.psp_bottleneck = ConvModule( + layer_channels[1] + len(pool_scales) * psp_out_channels, + psp_out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.conv_sub1 = nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=light_branch_middle_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + ConvModule( + in_channels=light_branch_middle_channels, + out_channels=light_branch_middle_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + ConvModule( + in_channels=light_branch_middle_channels, + out_channels=out_channels[0], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) + + self.conv_sub2 = ConvModule( + layer_channels[0], + out_channels[1], + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg) + + self.conv_sub4 = ConvModule( + psp_out_channels, + out_channels[2], + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg) + + def forward(self, x): + output = [] + + # sub 1 + output.append(self.conv_sub1(x)) + + # sub 2 + x = resize( + x, + scale_factor=0.5, + mode='bilinear', + align_corners=self.align_corners) + x = self.backbone.stem(x) + x = self.backbone.maxpool(x) + x = self.backbone.layer1(x) + x = self.backbone.layer2(x) + output.append(self.conv_sub2(x)) + + # sub 4 + x = resize( + x, + scale_factor=0.5, + mode='bilinear', + align_corners=self.align_corners) + x = self.backbone.layer3(x) + x = self.backbone.layer4(x) + psp_outs = self.psp_modules(x) + [x] + psp_outs = torch.cat(psp_outs, dim=1) + x = self.psp_bottleneck(psp_outs) + + output.append(self.conv_sub4(x)) + + return output diff --git a/mmseg/models/necks/__init__.py b/mmseg/models/necks/__init__.py index c496853c83..15edad493c 100644 --- a/mmseg/models/necks/__init__.py +++ b/mmseg/models/necks/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from .fpn import FPN +from .ic_neck import ICNeck from .mla_neck import MLANeck from .multilevel_neck import MultiLevelNeck -__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck'] +__all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck'] diff --git a/mmseg/models/necks/ic_neck.py b/mmseg/models/necks/ic_neck.py new file mode 100644 index 0000000000..d836a6b9ce --- /dev/null +++ b/mmseg/models/necks/ic_neck.py @@ -0,0 +1,147 @@ +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import NECKS + + +class CascadeFeatureFusion(BaseModule): + """Cascade Feature Fusion Unit in ICNet. + + Args: + low_channels (int): The number of input channels for + low resolution feature map. + high_channels (int): The number of input channels for + high resolution feature map. + out_channels (int): The number of output channels. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Returns: + x (Tensor): The output tensor of shape (N, out_channels, H, W). + x_low (Tensor): The output tensor of shape (N, out_channels, H, W) + for Cascade Label Guidance in auxiliary heads. + """ + + def __init__(self, + low_channels, + high_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + super(CascadeFeatureFusion, self).__init__(init_cfg=init_cfg) + self.align_corners = align_corners + self.conv_low = ConvModule( + low_channels, + out_channels, + 3, + padding=2, + dilation=2, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv_high = ConvModule( + high_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x_low, x_high): + x_low = resize( + x_low, + size=x_high.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + # Note: Different from original paper, `x_low` is underwent + # `self.conv_low` rather than another 1x1 conv classifier + # before being used for auxiliary head. + x_low = self.conv_low(x_low) + x_high = self.conv_high(x_high) + x = x_low + x_high + x = F.relu(x, inplace=True) + return x, x_low + + +@NECKS.register_module() +class ICNeck(BaseModule): + """ICNet for Real-Time Semantic Segmentation on High-Resolution Images. + + This head is the implementation of `ICHead + `_. + + Args: + in_channels (int): The number of input image channels. Default: 3. + out_channels (int): The numbers of output feature channels. + Default: 128. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=(64, 256, 256), + out_channels=128, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + super(ICNeck, self).__init__(init_cfg=init_cfg) + assert len(in_channels) == 3, 'Length of input channels \ + must be 3!' + + self.in_channels = in_channels + self.out_channels = out_channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.cff_24 = CascadeFeatureFusion( + self.in_channels[2], + self.in_channels[1], + self.out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + self.cff_12 = CascadeFeatureFusion( + self.out_channels, + self.in_channels[0], + self.out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def forward(self, inputs): + assert len(inputs) == 3, 'Length of input feature \ + maps must be 3!' + + x_sub1, x_sub2, x_sub4 = inputs + x_cff_24, x_24 = self.cff_24(x_sub4, x_sub2) + x_cff_12, x_12 = self.cff_12(x_cff_24, x_sub1) + # Note: `x_cff_12` is used for decode_head, + # `x_24` and `x_12` are used for auxiliary head. + return x_24, x_12, x_cff_12 diff --git a/model-index.yml b/model-index.yml index 7d18380c76..f0f9bb80e9 100644 --- a/model-index.yml +++ b/model-index.yml @@ -18,6 +18,7 @@ Import: - configs/fp16/fp16.yml - configs/gcnet/gcnet.yml - configs/hrnet/hrnet.yml +- configs/icnet/icnet.yml - configs/isanet/isanet.yml - configs/mobilenet_v2/mobilenet_v2.yml - configs/mobilenet_v3/mobilenet_v3.yml diff --git a/tests/test_models/test_backbones/test_icnet.py b/tests/test_models/test_backbones/test_icnet.py new file mode 100644 index 0000000000..a5861d8344 --- /dev/null +++ b/tests/test_models/test_backbones/test_icnet.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import ICNet + + +def test_icnet_backbone(): + with pytest.raises(TypeError): + # Must give backbone dict in config file. + ICNet( + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + backbone_cfg=None) + + # Test ICNet Standard Forward + model = ICNet( + backbone_cfg=dict( + type='ResNetV1c', + in_channels=3, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), ) + assert hasattr(model.backbone, + 'maxpool') and model.backbone.maxpool.ceil_mode is True + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 512, 1024) + feat = model(imgs) + + assert model.psp_modules[0][0].output_size == 1 + assert model.psp_modules[1][0].output_size == 2 + assert model.psp_modules[2][0].output_size == 3 + assert model.psp_bottleneck.padding == 1 + assert model.conv_sub1[0].padding == 1 + + assert len(feat) == 3 + assert feat[0].shape == torch.Size([batch_size, 64, 64, 128]) diff --git a/tests/test_models/test_backbones/test_swin.py b/tests/test_models/test_backbones/test_swin.py index 0529d1e321..83e0379637 100644 --- a/tests/test_models/test_backbones/test_swin.py +++ b/tests/test_models/test_backbones/test_swin.py @@ -50,22 +50,22 @@ def test_swin_transformer(): model(temp) # Test normal inference - temp = torch.randn((1, 3, 512, 512)) + temp = torch.randn((1, 3, 256, 256)) model = SwinTransformer() outs = model(temp) - assert outs[0].shape == (1, 96, 128, 128) - assert outs[1].shape == (1, 192, 64, 64) - assert outs[2].shape == (1, 384, 32, 32) - assert outs[3].shape == (1, 768, 16, 16) + assert outs[0].shape == (1, 96, 64, 64) + assert outs[1].shape == (1, 192, 32, 32) + assert outs[2].shape == (1, 384, 16, 16) + assert outs[3].shape == (1, 768, 8, 8) # Test abnormal inference size - temp = torch.randn((1, 3, 511, 511)) + temp = torch.randn((1, 3, 255, 255)) model = SwinTransformer() outs = model(temp) - assert outs[0].shape == (1, 96, 128, 128) - assert outs[1].shape == (1, 192, 64, 64) - assert outs[2].shape == (1, 384, 32, 32) - assert outs[3].shape == (1, 768, 16, 16) + assert outs[0].shape == (1, 96, 64, 64) + assert outs[1].shape == (1, 192, 32, 32) + assert outs[2].shape == (1, 384, 16, 16) + assert outs[3].shape == (1, 768, 8, 8) # Test abnormal inference size temp = torch.randn((1, 3, 112, 137)) @@ -89,7 +89,7 @@ def test_swin_transformer(): assert not p.requires_grad # Test Swin with checkpoint forward - temp = torch.randn((1, 3, 224, 224)) + temp = torch.randn((1, 3, 112, 112)) model = SwinTransformer(with_cp=True) for m in model.modules(): if isinstance(m, SwinBlock): diff --git a/tests/test_models/test_backbones/test_unet.py b/tests/test_models/test_backbones/test_unet.py index 3a035c8f0b..c4f2faca3f 100644 --- a/tests/test_models/test_backbones/test_unet.py +++ b/tests/test_models/test_backbones/test_unet.py @@ -345,7 +345,7 @@ def test_unet(): # case is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=4, strides=(1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2), @@ -362,7 +362,7 @@ def test_unet(): # case is 16. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -379,7 +379,7 @@ def test_unet(): # case is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -396,7 +396,7 @@ def test_unet(): # case is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 2, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -413,7 +413,7 @@ def test_unet(): # case is 32. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=6, strides=(1, 1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2, 2), @@ -428,7 +428,7 @@ def test_unet(): # Check if num_stages matchs strides, len(strides)=num_stages unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -443,7 +443,7 @@ def test_unet(): # Check if num_stages matchs strides, len(enc_num_convs)=num_stages unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2), @@ -458,7 +458,7 @@ def test_unet(): # Check if num_stages matchs strides, len(dec_num_convs)=num_stages-1 unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -473,7 +473,7 @@ def test_unet(): # Check if num_stages matchs strides, len(downsamples)=num_stages-1 unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -488,7 +488,7 @@ def test_unet(): # Check if num_stages matchs strides, len(enc_dilations)=num_stages unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -503,7 +503,7 @@ def test_unet(): # Check if num_stages matchs strides, len(dec_dilations)=num_stages-1 unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -517,7 +517,7 @@ def test_unet(): # test UNet norm_eval=True unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -532,7 +532,7 @@ def test_unet(): # test UNet norm_eval=False unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -547,7 +547,7 @@ def test_unet(): # test UNet forward and outputs. The whole downsample rate is 16. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -558,16 +558,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 8, 8]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 8, 8]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -578,16 +578,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 16, 16]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 2, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -598,16 +598,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 16, 16]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 4. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -618,16 +618,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 32, 32]) - assert x_outs[1].shape == torch.Size([2, 512, 32, 32]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 4. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -638,16 +638,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 32, 32]) - assert x_outs[1].shape == torch.Size([2, 512, 32, 32]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -658,16 +658,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 16, 16]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 4. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -678,16 +678,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 32, 32]) - assert x_outs[1].shape == torch.Size([2, 512, 32, 32]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 2. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -698,16 +698,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 64, 64]) - assert x_outs[1].shape == torch.Size([2, 512, 64, 64]) - assert x_outs[2].shape == torch.Size([2, 256, 64, 64]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 64, 64]) + assert x_outs[1].shape == torch.Size([2, 32, 64, 64]) + assert x_outs[2].shape == torch.Size([2, 16, 64, 64]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 1. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 1, 1, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -718,16 +718,16 @@ def test_unet(): x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 128, 128]) - assert x_outs[1].shape == torch.Size([2, 512, 128, 128]) - assert x_outs[2].shape == torch.Size([2, 256, 128, 128]) - assert x_outs[3].shape == torch.Size([2, 128, 128, 128]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[1].shape == torch.Size([2, 32, 128, 128]) + assert x_outs[2].shape == torch.Size([2, 16, 128, 128]) + assert x_outs[3].shape == torch.Size([2, 8, 128, 128]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 16. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -737,16 +737,16 @@ def test_unet(): dec_dilations=(1, 1, 1, 1)) x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 8, 8]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 8, 8]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -756,16 +756,16 @@ def test_unet(): dec_dilations=(1, 1, 1, 1)) x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 16, 16]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 8. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 2, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -775,16 +775,16 @@ def test_unet(): dec_dilations=(1, 1, 1, 1)) x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 16, 16]) - assert x_outs[1].shape == torch.Size([2, 512, 16, 16]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet forward and outputs. The whole downsample rate is 4. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -794,16 +794,16 @@ def test_unet(): dec_dilations=(1, 1, 1, 1)) x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 32, 32]) - assert x_outs[1].shape == torch.Size([2, 512, 32, 32]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) # test UNet init_weights method. unet = UNet( in_channels=3, - base_channels=64, + base_channels=4, num_stages=5, strides=(1, 2, 2, 1, 1), enc_num_convs=(2, 2, 2, 2, 2), @@ -815,8 +815,8 @@ def test_unet(): unet.init_weights() x = torch.randn(2, 3, 128, 128) x_outs = unet(x) - assert x_outs[0].shape == torch.Size([2, 1024, 32, 32]) - assert x_outs[1].shape == torch.Size([2, 512, 32, 32]) - assert x_outs[2].shape == torch.Size([2, 256, 32, 32]) - assert x_outs[3].shape == torch.Size([2, 128, 64, 64]) - assert x_outs[4].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) diff --git a/tests/test_models/test_necks/test_ic_neck.py b/tests/test_models/test_necks/test_ic_neck.py new file mode 100644 index 0000000000..10b10609f9 --- /dev/null +++ b/tests/test_models/test_necks/test_ic_neck.py @@ -0,0 +1,53 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.necks import ICNeck +from mmseg.models.necks.ic_neck import CascadeFeatureFusion +from ..test_heads.utils import _conv_has_norm, to_cuda + + +def test_ic_neck(): + # test with norm_cfg + neck = ICNeck( + in_channels=(64, 256, 256), + out_channels=128, + norm_cfg=dict(type='SyncBN'), + align_corners=False) + assert _conv_has_norm(neck, sync_bn=True) + + inputs = [ + torch.randn(1, 64, 128, 256), + torch.randn(1, 256, 65, 129), + torch.randn(1, 256, 32, 64) + ] + neck = ICNeck( + in_channels=(64, 256, 256), + out_channels=128, + norm_cfg=dict(type='BN', requires_grad=True), + align_corners=False) + if torch.cuda.is_available(): + neck, inputs = to_cuda(neck, inputs) + + outputs = neck(inputs) + assert outputs[0].shape == (1, 128, 65, 129) + assert outputs[1].shape == (1, 128, 128, 256) + assert outputs[1].shape == (1, 128, 128, 256) + + +def test_ic_neck_cascade_feature_fusion(): + cff = CascadeFeatureFusion(256, 256, 128) + assert cff.conv_low.in_channels == 256 + assert cff.conv_low.out_channels == 128 + assert cff.conv_high.in_channels == 256 + assert cff.conv_high.out_channels == 128 + + +def test_ic_neck_input_channels(): + with pytest.raises(AssertionError): + # ICNet Neck input channel constraints. + ICNeck( + in_channels=(64, 256, 256, 256), + out_channels=128, + norm_cfg=dict(type='BN', requires_grad=True), + align_corners=False) diff --git a/tools/benchmark_new.py b/tools/benchmark_new.py new file mode 100644 index 0000000000..e69de29bb2