diff --git a/configs/segformer/README.md b/configs/segformer/README.md index 2abb82b550..5606960722 100644 --- a/configs/segformer/README.md +++ b/configs/segformer/README.md @@ -93,3 +93,16 @@ test_pipeline = [ ]) ] ``` + +### Cityscapes + +The lower fps result is caused by the sliding window inference scheme (window size:1024x1024). + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ---: | ------------- | ------ | -------- | +|Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | +|Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | +|Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | +|Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | +|Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | +|Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | diff --git a/configs/segformer/segformer.yml b/configs/segformer/segformer.yml index df46d5260c..4d94532571 100644 --- a/configs/segformer/segformer.yml +++ b/configs/segformer/segformer.yml @@ -3,6 +3,7 @@ Collections: Metadata: Training Data: - ADE20K + - Cityscapes Paper: URL: https://arxiv.org/abs/2105.15203 Title: resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. @@ -167,3 +168,135 @@ Models: mIoU(ms+flip): 50.36 Config: configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth +- Name: segformer_mit-b0_8x1_1024x1024_160k_cityscapes + In Collection: segformer + Metadata: + backbone: MIT-B0 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 210.97 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 3.64 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.54 + mIoU(ms+flip): 78.22 + Config: configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth +- Name: segformer_mit-b1_8x1_1024x1024_160k_cityscapes + In Collection: segformer + Metadata: + backbone: MIT-B1 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 232.56 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 4.49 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.56 + mIoU(ms+flip): 79.73 + Config: configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth +- Name: segformer_mit-b2_8x1_1024x1024_160k_cityscapes + In Collection: segformer + Metadata: + backbone: MIT-B2 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 297.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 7.42 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.08 + mIoU(ms+flip): 82.18 + Config: configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth +- Name: segformer_mit-b3_8x1_1024x1024_160k_cityscapes + In Collection: segformer + Metadata: + backbone: MIT-B3 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 395.26 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 10.86 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.94 + mIoU(ms+flip): 83.14 + Config: configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth +- Name: segformer_mit-b4_8x1_1024x1024_160k_cityscapes + In Collection: segformer + Metadata: + backbone: MIT-B4 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 531.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 15.07 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.89 + mIoU(ms+flip): 83.38 + Config: configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth +- Name: segformer_mit-b5_8x1_1024x1024_160k_cityscapes + In Collection: segformer + Metadata: + backbone: MIT-B5 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 719.42 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 18.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 82.25 + mIoU(ms+flip): 83.48 + Config: configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth diff --git a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000000..6444500537 --- /dev/null +++ b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/segformer_mit-b0.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b0.pth')), + test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) + +# optimizer +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +data = dict(samples_per_gpu=1, workers_per_gpu=1) diff --git a/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000000..a93e33bd88 --- /dev/null +++ b/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b1.pth'), + embed_dims=64), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000000..fab6be2945 --- /dev/null +++ b/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b2.pth'), + embed_dims=64, + num_layers=[3, 4, 6, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000000..479ce04ea1 --- /dev/null +++ b/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b3.pth'), + embed_dims=64, + num_layers=[3, 4, 18, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000000..808a1eb41b --- /dev/null +++ b/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b4.pth'), + embed_dims=64, + num_layers=[3, 8, 27, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000000..1c9422d37c --- /dev/null +++ b/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='pretrain/mit_b5.pth'), + embed_dims=64, + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512]))