From 1c6660980206754146d5cc16c29297648b158598 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 26 Apr 2021 12:05:06 +0800 Subject: [PATCH 01/32] add config --- configs/_base_/models/deeplabv3_vit-d16.py | 53 +++++++++++++++++++ configs/_base_/models/fcn_vit-d16.py | 53 +++++++++++++++++++ configs/_base_/models/pspnet_vit-d16.py | 52 ++++++++++++++++++ configs/_base_/models/upernet_vit-d16.py | 52 ++++++++++++++++++ .../vit/deeplabv3_vit_512x512_80k_ade20k.py | 24 +++++++++ configs/vit/fcn_vit_512x512_80k_ade20k.py | 25 +++++++++ configs/vit/pspnet_vit_512x512_80k_ade20k.py | 25 +++++++++ configs/vit/upernet_vit_512x512_80k_ade20k.py | 25 +++++++++ mmseg/models/necks/multilevel_neck.py | 9 +++- 9 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 configs/_base_/models/deeplabv3_vit-d16.py create mode 100644 configs/_base_/models/fcn_vit-d16.py create mode 100644 configs/_base_/models/pspnet_vit-d16.py create mode 100644 configs/_base_/models/upernet_vit-d16.py create mode 100644 configs/vit/deeplabv3_vit_512x512_80k_ade20k.py create mode 100644 configs/vit/fcn_vit_512x512_80k_ade20k.py create mode 100644 configs/vit/pspnet_vit_512x512_80k_ade20k.py create mode 100644 configs/vit/upernet_vit_512x512_80k_ade20k.py diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py new file mode 100644 index 0000000000..e2703f04cd --- /dev/null +++ b/configs/_base_/models/deeplabv3_vit-d16.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + norm_eval=False), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768), + decode_head=dict( + type='ASPPHead', + in_channels=768, + in_index=1, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +find_unused_parameters=True diff --git a/configs/_base_/models/fcn_vit-d16.py b/configs/_base_/models/fcn_vit-d16.py new file mode 100644 index 0000000000..91d22fd09e --- /dev/null +++ b/configs/_base_/models/fcn_vit-d16.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + norm_eval=False), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768), + decode_head=dict( + type='FCNHead', + in_channels=768, + in_index=1, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=0, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pspnet_vit-d16.py b/configs/_base_/models/pspnet_vit-d16.py new file mode 100644 index 0000000000..2a31775530 --- /dev/null +++ b/configs/_base_/models/pspnet_vit-d16.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + norm_eval=False), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768), + decode_head=dict( + type='PSPHead', + in_channels=768, + in_index=1, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=0, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_vit-d16.py b/configs/_base_/models/upernet_vit-d16.py new file mode 100644 index 0000000000..f375c0e5d5 --- /dev/null +++ b/configs/_base_/models/upernet_vit-d16.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + norm_eval=False), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768), + decode_head=dict( + type='UPerHead', + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=0, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/vit/deeplabv3_vit_512x512_80k_ade20k.py b/configs/vit/deeplabv3_vit_512x512_80k_ade20k.py new file mode 100644 index 0000000000..7e4d96cddd --- /dev/null +++ b/configs/vit/deeplabv3_vit_512x512_80k_ade20k.py @@ -0,0 +1,24 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True \ No newline at end of file diff --git a/configs/vit/fcn_vit_512x512_80k_ade20k.py b/configs/vit/fcn_vit_512x512_80k_ade20k.py new file mode 100644 index 0000000000..93317d58e5 --- /dev/null +++ b/configs/vit/fcn_vit_512x512_80k_ade20k.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True diff --git a/configs/vit/pspnet_vit_512x512_80k_ade20k.py b/configs/vit/pspnet_vit_512x512_80k_ade20k.py new file mode 100644 index 0000000000..44d89f5061 --- /dev/null +++ b/configs/vit/pspnet_vit_512x512_80k_ade20k.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) +evaluation=dict(max_iteration=100, metrics='mIoU') +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True + diff --git a/configs/vit/upernet_vit_512x512_80k_ade20k.py b/configs/vit/upernet_vit_512x512_80k_ade20k.py new file mode 100644 index 0000000000..6fb97edd37 --- /dev/null +++ b/configs/vit/upernet_vit_512x512_80k_ade20k.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True \ No newline at end of file diff --git a/mmseg/models/necks/multilevel_neck.py b/mmseg/models/necks/multilevel_neck.py index 7e13813b16..2cfc565671 100644 --- a/mmseg/models/necks/multilevel_neck.py +++ b/mmseg/models/necks/multilevel_neck.py @@ -1,6 +1,6 @@ import torch.nn as nn import torch.nn.functional as F -from mmcv.cnn import ConvModule +from mmcv.cnn import ConvModule, xavier_init from ..builder import NECKS @@ -51,10 +51,15 @@ def __init__(self, stride=1, norm_cfg=norm_cfg, act_cfg=act_cfg)) + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') def forward(self, inputs): assert len(inputs) == len(self.in_channels) - print(inputs[0].shape) + inputs = [ lateral_conv(inputs[i]) for i, lateral_conv in enumerate(self.lateral_convs) From a439a402e1e4852a84356d4f3e441b15fa8a9c66 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 26 Apr 2021 14:06:14 +0800 Subject: [PATCH 02/32] add cityscapes config --- ...eeplabv3_vit-d16_512x512_40k_cityscapes.py | 21 ++++++++++++++++++ ...> deeplabv3_vit-d16_512x512_80k_ade20k.py} | 0 .../fcn_vit-d16_512x1024_40k_cityscapes.py | 21 ++++++++++++++++++ ...k.py => fcn_vit-d16_512x512_80k_ade20k.py} | 0 .../pspnet_vit-d16_512x1024_40k_cityscapes.py | 22 +++++++++++++++++++ ...y => pspnet_vit-d16_512x512_80k_ade20k.py} | 2 +- .../upernet_vit-d16_512x512_40k_cityscapes.py | 21 ++++++++++++++++++ ... => upernet_vit-d16_512x512_80k_ade20k.py} | 0 8 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py rename configs/vit/{deeplabv3_vit_512x512_80k_ade20k.py => deeplabv3_vit-d16_512x512_80k_ade20k.py} (100%) create mode 100644 configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py rename configs/vit/{fcn_vit_512x512_80k_ade20k.py => fcn_vit-d16_512x512_80k_ade20k.py} (100%) create mode 100644 configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py rename configs/vit/{pspnet_vit_512x512_80k_ade20k.py => pspnet_vit-d16_512x512_80k_ade20k.py} (95%) create mode 100644 configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py rename configs/vit/{upernet_vit_512x512_80k_ade20k.py => upernet_vit-d16_512x512_80k_ade20k.py} (100%) diff --git a/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py new file mode 100644 index 0000000000..f93141ea1e --- /dev/null +++ b/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py @@ -0,0 +1,21 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True \ No newline at end of file diff --git a/configs/vit/deeplabv3_vit_512x512_80k_ade20k.py b/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py similarity index 100% rename from configs/vit/deeplabv3_vit_512x512_80k_ade20k.py rename to configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py diff --git a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..e69271be4c --- /dev/null +++ b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,21 @@ +_base_ = [ + '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True diff --git a/configs/vit/fcn_vit_512x512_80k_ade20k.py b/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py similarity index 100% rename from configs/vit/fcn_vit_512x512_80k_ade20k.py rename to configs/vit/fcn_vit-d16_512x512_80k_ade20k.py diff --git a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..1ab36b3c53 --- /dev/null +++ b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True + diff --git a/configs/vit/pspnet_vit_512x512_80k_ade20k.py b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py similarity index 95% rename from configs/vit/pspnet_vit_512x512_80k_ade20k.py rename to configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py index 44d89f5061..382e5b774f 100644 --- a/configs/vit/pspnet_vit_512x512_80k_ade20k.py +++ b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py @@ -5,7 +5,7 @@ model = dict( decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) -evaluation=dict(max_iteration=100, metrics='mIoU') + # AdamW optimizer, no weight decay for position embedding & layer norm in backbone optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), diff --git a/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py b/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py new file mode 100644 index 0000000000..1ea292cf24 --- /dev/null +++ b/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py @@ -0,0 +1,21 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data=dict(samples_per_gpu=2) + +find_unused_parameters=True \ No newline at end of file diff --git a/configs/vit/upernet_vit_512x512_80k_ade20k.py b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py similarity index 100% rename from configs/vit/upernet_vit_512x512_80k_ade20k.py rename to configs/vit/upernet_vit-d16_512x512_80k_ade20k.py From b7aa7eef9fdb848f6bf1b146758d1f6a6ac81b90 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 26 Apr 2021 14:14:36 +0800 Subject: [PATCH 03/32] add default value to docstring --- mmseg/models/necks/multilevel_neck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mmseg/models/necks/multilevel_neck.py b/mmseg/models/necks/multilevel_neck.py index 2cfc565671..65a4d4e611 100644 --- a/mmseg/models/necks/multilevel_neck.py +++ b/mmseg/models/necks/multilevel_neck.py @@ -13,7 +13,8 @@ class MultiLevelNeck(nn.Module): Args: in_channels (List[int]): Number of input channels per scale. out_channels (int): Number of output channels (used at each scale). - scales (List[int]): Scale factors for each input feature map. + scales (List[float]): Scale factors for each input feature map. + Default: [0.5, 1, 2, 4] norm_cfg (dict): Config dict for normalization layer. Default: None. act_cfg (dict): Config dict for activation layer in ConvModule. Default: None. @@ -51,6 +52,7 @@ def __init__(self, stride=1, norm_cfg=norm_cfg, act_cfg=act_cfg)) + # default init_weights for conv(msra) and norm in ConvModule def init_weights(self): for m in self.modules(): From e7d62431c2d3de1c11d43ba9857ac864b668c6da Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 26 Apr 2021 16:46:18 +0800 Subject: [PATCH 04/32] fix lint --- configs/_base_/models/deeplabv3_vit-d16.py | 10 ++--- configs/_base_/models/fcn_vit-d16.py | 8 ++-- configs/_base_/models/pspnet_vit-d16.py | 8 ++-- configs/_base_/models/upernet_vit-d16.py | 8 ++-- ...eeplabv3_vit-d16_512x512_40k_cityscapes.py | 42 ++++++++++++------- .../deeplabv3_vit-d16_512x512_80k_ade20k.py | 37 ++++++++++------ .../fcn_vit-d16_512x1024_40k_cityscapes.py | 37 ++++++++++------ configs/vit/fcn_vit-d16_512x512_80k_ade20k.py | 38 +++++++++++------ .../pspnet_vit-d16_512x1024_40k_cityscapes.py | 38 +++++++++++------ .../vit/pspnet_vit-d16_512x512_80k_ade20k.py | 38 +++++++++++------ .../upernet_vit-d16_512x512_40k_cityscapes.py | 37 ++++++++++------ .../vit/upernet_vit-d16_512x512_80k_ade20k.py | 38 +++++++++++------ 12 files changed, 215 insertions(+), 124 deletions(-) diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py index e2703f04cd..234318f74a 100644 --- a/configs/_base_/models/deeplabv3_vit-d16.py +++ b/configs/_base_/models/deeplabv3_vit-d16.py @@ -2,7 +2,8 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ +download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', backbone=dict( type='VisionTransformer', img_size=(512, 512), @@ -18,10 +19,7 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768), + neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), decode_head=dict( type='ASPPHead', in_channels=768, @@ -50,4 +48,4 @@ # model training and testing settings train_cfg=dict(), test_cfg=dict(mode='whole')) -find_unused_parameters=True +find_unused_parameters = True diff --git a/configs/_base_/models/fcn_vit-d16.py b/configs/_base_/models/fcn_vit-d16.py index 91d22fd09e..6f3009c0dc 100644 --- a/configs/_base_/models/fcn_vit-d16.py +++ b/configs/_base_/models/fcn_vit-d16.py @@ -2,7 +2,8 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ +download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', backbone=dict( type='VisionTransformer', img_size=(512, 512), @@ -18,10 +19,7 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768), + neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), decode_head=dict( type='FCNHead', in_channels=768, diff --git a/configs/_base_/models/pspnet_vit-d16.py b/configs/_base_/models/pspnet_vit-d16.py index 2a31775530..1a985f27d2 100644 --- a/configs/_base_/models/pspnet_vit-d16.py +++ b/configs/_base_/models/pspnet_vit-d16.py @@ -2,7 +2,8 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ +download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', backbone=dict( type='VisionTransformer', img_size=(512, 512), @@ -18,10 +19,7 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768), + neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), decode_head=dict( type='PSPHead', in_channels=768, diff --git a/configs/_base_/models/upernet_vit-d16.py b/configs/_base_/models/upernet_vit-d16.py index f375c0e5d5..3a716f09f0 100644 --- a/configs/_base_/models/upernet_vit-d16.py +++ b/configs/_base_/models/upernet_vit-d16.py @@ -2,7 +2,8 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ +download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', backbone=dict( type='VisionTransformer', img_size=(512, 512), @@ -18,10 +19,7 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768), + neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), decode_head=dict( type='UPerHead', in_channels=[768, 768, 768, 768], diff --git a/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py index f93141ea1e..84aade606b 100644 --- a/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py +++ b/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py @@ -1,21 +1,35 @@ _base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' + '../_base_/models/deeplabv3_vit-d16.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' ] -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) +data = dict(samples_per_gpu=2) -find_unused_parameters=True \ No newline at end of file +find_unused_parameters = True diff --git a/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py b/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py index 7e4d96cddd..2f4dd2cfed 100644 --- a/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py @@ -6,19 +6,32 @@ model = dict( decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +# AdamW optimizer, no weight decay for position embedding& layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) +data = dict(samples_per_gpu=2) -find_unused_parameters=True \ No newline at end of file +find_unused_parameters = True diff --git a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py index e69271be4c..876c66492e 100644 --- a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py @@ -3,19 +3,32 @@ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) +data = dict(samples_per_gpu=2) -find_unused_parameters=True +find_unused_parameters = True diff --git a/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py b/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py index 93317d58e5..bd68a15f68 100644 --- a/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py @@ -6,20 +6,32 @@ model = dict( decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) - -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) +data = dict(samples_per_gpu=2) -find_unused_parameters=True +find_unused_parameters = True diff --git a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py index 1ab36b3c53..675addabdc 100644 --- a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py @@ -3,20 +3,32 @@ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) - -find_unused_parameters=True +data = dict(samples_per_gpu=2) +find_unused_parameters = True diff --git a/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py index 382e5b774f..8c6a886769 100644 --- a/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py @@ -6,20 +6,32 @@ model = dict( decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) - -find_unused_parameters=True +data = dict(samples_per_gpu=2) +find_unused_parameters = True diff --git a/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py b/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py index 1ea292cf24..19509e4a31 100644 --- a/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py +++ b/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py @@ -3,19 +3,30 @@ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) - -find_unused_parameters=True \ No newline at end of file +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py index 6fb97edd37..28767616e4 100644 --- a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py @@ -6,20 +6,32 @@ model = dict( decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) -# AdamW optimizer, no weight decay for position embedding & layer norm in backbone -optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, - paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.)})) - -lr_config = dict(_delete_=True, policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, min_lr=0.0, by_epoch=False) +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) # By default, models are trained on 8 GPUs with 2 images per GPU -data=dict(samples_per_gpu=2) +data = dict(samples_per_gpu=2) -find_unused_parameters=True \ No newline at end of file +find_unused_parameters = True From 0ce89588c73d28975459c6ba6730b6514740fec0 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 27 Apr 2021 12:07:25 +0800 Subject: [PATCH 05/32] add deit-s and deit-b --- configs/_base_/models/deeplabv3_vit-d16.py | 6 ++- configs/_base_/models/fcn_vit-d16.py | 6 ++- configs/_base_/models/pspnet_vit-d16.py | 6 ++- configs/_base_/models/upernet_vit-d16.py | 6 ++- ...labv3_deitB-d16-512x1024_40k_cityscapes.py | 38 +++++++++++++++++ .../deeplabv3_deitB-d16-512x512_80k_ade20k.py | 40 ++++++++++++++++++ ...labv3_deitS-d16-512x1024_40k_cityscapes.py | 40 ++++++++++++++++++ .../deeplabv3_deitS-d16-512x512_80k_ade20k.py | 41 +++++++++++++++++++ ...eplabv3_vit-d16_512x1024_40k_cityscapes.py | 35 ++++++++++++++++ .../fcn_deitB-d16_512x1024_40k_cityscapes.py | 37 +++++++++++++++++ .../vit/fcn_deitB-d16_512x512_40k_ade20k.py | 40 ++++++++++++++++++ .../fcn_deitS-d16_512x1024_40k_cityscapes.py | 39 ++++++++++++++++++ .../vit/fcn_deitS-d16_512x512_80k_ade20k.py | 41 +++++++++++++++++++ .../pspnet_deitB-d16-512x512_80k_ade20k.py | 40 ++++++++++++++++++ ...spnet_deitB-d16_512x1024_40k_cityscapes.py | 37 +++++++++++++++++ ...spnet_deitS-d16_512x1024_40k_cityscapes.py | 39 ++++++++++++++++++ .../pspnet_deitS-d16_512x512_80k_ade20k.py | 41 +++++++++++++++++++ .../pspnet_vit-d16_512x1024_40k_cityscapes.py | 2 +- ...ernet_deitB-d16-512x1024_40k_cityscapes.py | 37 +++++++++++++++++ .../upernet_deitB-d16-512x512_80k_ade20k.py | 40 ++++++++++++++++++ ...ernet_deitS-d16-512x1024_40k_cityscapes.py | 39 ++++++++++++++++++ .../upernet_deitS-d16-512x512_80k_ade20k.py | 41 +++++++++++++++++++ ...upernet_vit-d16_512x1024_40k_cityscapes.py | 32 +++++++++++++++ 23 files changed, 718 insertions(+), 5 deletions(-) create mode 100644 configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py create mode 100644 configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py create mode 100644 configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py create mode 100644 configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py create mode 100644 configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py create mode 100644 configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py create mode 100644 configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py create mode 100644 configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py create mode 100644 configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py create mode 100644 configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py create mode 100644 configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py create mode 100644 configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py create mode 100644 configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py create mode 100644 configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py create mode 100644 configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py create mode 100644 configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py create mode 100644 configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py create mode 100644 configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py index 234318f74a..0a795d89b7 100644 --- a/configs/_base_/models/deeplabv3_vit-d16.py +++ b/configs/_base_/models/deeplabv3_vit-d16.py @@ -19,7 +19,11 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768, + scales=[0.5, 1, 2, 4]), decode_head=dict( type='ASPPHead', in_channels=768, diff --git a/configs/_base_/models/fcn_vit-d16.py b/configs/_base_/models/fcn_vit-d16.py index 6f3009c0dc..28bfc3c32d 100644 --- a/configs/_base_/models/fcn_vit-d16.py +++ b/configs/_base_/models/fcn_vit-d16.py @@ -19,7 +19,11 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768, + scales=[0.5, 1, 2, 4]), decode_head=dict( type='FCNHead', in_channels=768, diff --git a/configs/_base_/models/pspnet_vit-d16.py b/configs/_base_/models/pspnet_vit-d16.py index 1a985f27d2..e3b9bc8b6f 100644 --- a/configs/_base_/models/pspnet_vit-d16.py +++ b/configs/_base_/models/pspnet_vit-d16.py @@ -19,7 +19,11 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768, + scales=[0.5, 1, 2, 4]), decode_head=dict( type='PSPHead', in_channels=768, diff --git a/configs/_base_/models/upernet_vit-d16.py b/configs/_base_/models/upernet_vit-d16.py index 3a716f09f0..9649a500a4 100644 --- a/configs/_base_/models/upernet_vit-d16.py +++ b/configs/_base_/models/upernet_vit-d16.py @@ -19,7 +19,11 @@ norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), norm_eval=False), - neck=dict(type='MultiLevelNeck', in_channels=[768], out_channels=768), + neck=dict( + type='MultiLevelNeck', + in_channels=[768], + out_channels=768, + scales=[0.5, 1, 2, 4]), decode_head=dict( type='UPerHead', in_channels=[768, 768, 768, 768], diff --git a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..22ee353d58 --- /dev/null +++ b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] + +model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth') + +# AdamW optimizer, no weight decay for position embedding& layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py new file mode 100644 index 0000000000..a380f94e6a --- /dev/null +++ b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding& layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..b0bb90ea48 --- /dev/null +++ b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6)) + +# AdamW optimizer, no weight decay for position embedding& layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py new file mode 100644 index 0000000000..b1d2803c13 --- /dev/null +++ b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py @@ -0,0 +1,41 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding& layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..84aade606b --- /dev/null +++ b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/deeplabv3_vit-d16.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..43f734530f --- /dev/null +++ b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth') + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py new file mode 100644 index 0000000000..42754d39e6 --- /dev/null +++ b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..0fe0f7b664 --- /dev/null +++ b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py new file mode 100644 index 0000000000..3ca16e4398 --- /dev/null +++ b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py @@ -0,0 +1,41 @@ +_base_ = [ + '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py new file mode 100644 index 0000000000..bc921f94ab --- /dev/null +++ b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..d14c13baa7 --- /dev/null +++ b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth') + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..0bb3c81cbc --- /dev/null +++ b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py new file mode 100644 index 0000000000..325e84b058 --- /dev/null +++ b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py @@ -0,0 +1,41 @@ +_base_ = [ + '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py index 675addabdc..e0ea7817f1 100644 --- a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py @@ -1,6 +1,6 @@ _base_ = [ '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] # AdamW optimizer, no weight decay for position embedding & layer norm diff --git a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..2c14c77e66 --- /dev/null +++ b/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth') + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py new file mode 100644 index 0000000000..3fa5764abf --- /dev/null +++ b/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_patch16_224-b5f2ef4d.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..0bfa7ba0cf --- /dev/null +++ b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py new file mode 100644 index 0000000000..754f919194 --- /dev/null +++ b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py @@ -0,0 +1,41 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) + +find_unused_parameters = True diff --git a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000000..19509e4a31 --- /dev/null +++ b/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) From 6b4046543e12964a0554b034f898c0b9fbfc64ae Mon Sep 17 00:00:00 2001 From: xiexinch Date: Wed, 28 Apr 2021 19:40:05 +0800 Subject: [PATCH 06/32] add readme --- configs/vit/README.md | 50 +++++++++++++++++++ ...eeplabv3_vit-d16_512x512_40k_cityscapes.py | 35 ------------- .../upernet_vit-d16_512x512_40k_cityscapes.py | 32 ------------ .../test_necks/test_multilevel_neck.py | 3 ++ 4 files changed, 53 insertions(+), 67 deletions(-) create mode 100644 configs/vit/README.md delete mode 100644 configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py delete mode 100644 configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py diff --git a/configs/vit/README.md b/configs/vit/README.md new file mode 100644 index 0000000000..ee68c684dd --- /dev/null +++ b/configs/vit/README.md @@ -0,0 +1,50 @@ +# Vision Transformer + +## Introduction + + + +```latex +@article{dosovitskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UPerNet | Vit | 512x1024 | 40000 | | | | | | +| UPerNet | Deit-S | 512x1024 | 40000 | | | | | | +| UPerNet | Deit-B | 512x1024 | 40000 | | | | | | +| DeepLabV3 | Vit | 512x1024 | 40000 | | | | | | +| DeepLabV3 | Deit-S | 512x1024 | 40000 | | | | | | +| DeepLabV3 | Deit-B | 512x1024 | 40000 | | | | | | +| PSPNet | Vit | 512x1024 | 40000 | | | | | | +| PSPNet | Deit-S | 512x1024 | 40000 | | | | | | +| PSPNet | Deit-B | 512x1024 | 40000 | | | | | | +| FCN | Vit | 512x1024 | 40000 | | | | | | +| FCN | Deit-S | 512x1024 | 40000 | | | | | | +| FCN | Deit-B | 512x1024 | 40000 | | | | | | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | Vit | 512x512 | 80000 | | | | | | +| UPerNet | Deit-S | 512x512 | 80000 | | | | | | +| UPerNet | Deit-B | 512x512 | 80000 | | | | | | +| DeepLabV3 | Vit | 512x512 | 80000 | | | | | | +| DeepLabV3 | Deit-S | 512x512 | 80000 | | | | | | +| DeepLabV3 | Deit-B | 512x512 | 80000 | | | | | | +| PSPNet | Vit | 512x512 | 80000 | | | | | | +| PSPNet | Deit-S | 512x512 | 80000 | | | | | | +| PSPNet | Deit-B | 512x512 | 80000 | | | | | | +| FCN | Vit | 512x512 | 80000 | | | | | | +| FCN | Deit-S | 512x512 | 80000 | | | | | | +| FCN | Deit-B | 512x512 | 80000 | | | | | | diff --git a/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py deleted file mode 100644 index 84aade606b..0000000000 --- a/configs/vit/deeplabv3_vit-d16_512x512_40k_cityscapes.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', - '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', - '../_base_/schedules/schedule_80k.py' -] - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py b/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py deleted file mode 100644 index 19509e4a31..0000000000 --- a/configs/vit/upernet_vit-d16_512x512_40k_cityscapes.py +++ /dev/null @@ -1,32 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/tests/test_models/test_necks/test_multilevel_neck.py b/tests/test_models/test_necks/test_multilevel_neck.py index 8fb2fc9280..c5a567d988 100644 --- a/tests/test_models/test_necks/test_multilevel_neck.py +++ b/tests/test_models/test_necks/test_multilevel_neck.py @@ -5,6 +5,9 @@ def test_multilevel_neck(): + # Test init_weights + MultiLevelNeck([266], 256).init_weights() + # Test multi feature maps in_channels = [256, 512, 1024, 2048] inputs = [torch.randn(1, c, 14, 14) for i, c in enumerate(in_channels)] From 6d0ab21eb4f2146d754f18349fdfe091a06d5910 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Thu, 29 Apr 2021 19:57:52 +0800 Subject: [PATCH 07/32] add eps at norm_cfg --- configs/_base_/models/deeplabv3_vit-d16.py | 2 +- configs/_base_/models/fcn_vit-d16.py | 2 +- configs/_base_/models/pspnet_vit-d16.py | 2 +- configs/_base_/models/upernet_vit-d16.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py index 0a795d89b7..7bec921e32 100644 --- a/configs/_base_/models/deeplabv3_vit-d16.py +++ b/configs/_base_/models/deeplabv3_vit-d16.py @@ -16,7 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, - norm_cfg=dict(type='LN'), + norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), neck=dict( diff --git a/configs/_base_/models/fcn_vit-d16.py b/configs/_base_/models/fcn_vit-d16.py index 28bfc3c32d..8546569c5f 100644 --- a/configs/_base_/models/fcn_vit-d16.py +++ b/configs/_base_/models/fcn_vit-d16.py @@ -16,7 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, - norm_cfg=dict(type='LN'), + norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), neck=dict( diff --git a/configs/_base_/models/pspnet_vit-d16.py b/configs/_base_/models/pspnet_vit-d16.py index e3b9bc8b6f..58c78e11d8 100644 --- a/configs/_base_/models/pspnet_vit-d16.py +++ b/configs/_base_/models/pspnet_vit-d16.py @@ -16,7 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, - norm_cfg=dict(type='LN'), + norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), neck=dict( diff --git a/configs/_base_/models/upernet_vit-d16.py b/configs/_base_/models/upernet_vit-d16.py index 9649a500a4..f13c59f39e 100644 --- a/configs/_base_/models/upernet_vit-d16.py +++ b/configs/_base_/models/upernet_vit-d16.py @@ -16,7 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, - norm_cfg=dict(type='LN'), + norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), neck=dict( From f7b8c1800f5de1788019af810f3a767119eb37d2 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Thu, 6 May 2021 11:40:22 +0800 Subject: [PATCH 08/32] add drop_path_rate experiment --- configs/_base_/models/deeplabv3_vit-d16.py | 1 + configs/_base_/models/fcn_vit-d16.py | 1 + configs/_base_/models/pspnet_vit-d16.py | 1 + configs/_base_/models/upernet_vit-d16.py | 1 + configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py | 6 ++---- configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py | 6 ++---- configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py | 6 ++---- configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py | 6 ++---- configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py | 6 ++---- configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py | 6 ++---- configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py | 6 ++---- configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/fcn_vit-d16_512x512_80k_ade20k.py | 6 ++---- configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py | 6 ++---- configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py | 6 ++---- configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py | 6 ++---- configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py | 6 ++---- configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py | 4 ++-- configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py | 6 ++---- configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py | 4 ++-- configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py | 4 ++-- configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py | 4 ++-- configs/vit/upernet_vit-d16_512x512_80k_ade20k.py | 4 ++-- 28 files changed, 52 insertions(+), 86 deletions(-) diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py index 7bec921e32..b3debcf32e 100644 --- a/configs/_base_/models/deeplabv3_vit-d16.py +++ b/configs/_base_/models/deeplabv3_vit-d16.py @@ -16,6 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, + drop_path_rate=0.3, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), diff --git a/configs/_base_/models/fcn_vit-d16.py b/configs/_base_/models/fcn_vit-d16.py index 8546569c5f..bcf877cb84 100644 --- a/configs/_base_/models/fcn_vit-d16.py +++ b/configs/_base_/models/fcn_vit-d16.py @@ -16,6 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, + drop_path_rate=0.3, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), diff --git a/configs/_base_/models/pspnet_vit-d16.py b/configs/_base_/models/pspnet_vit-d16.py index 58c78e11d8..e510e36b60 100644 --- a/configs/_base_/models/pspnet_vit-d16.py +++ b/configs/_base_/models/pspnet_vit-d16.py @@ -16,6 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, + drop_path_rate=0.3, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), diff --git a/configs/_base_/models/upernet_vit-d16.py b/configs/_base_/models/upernet_vit-d16.py index f13c59f39e..acc3030793 100644 --- a/configs/_base_/models/upernet_vit-d16.py +++ b/configs/_base_/models/upernet_vit-d16.py @@ -16,6 +16,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, + drop_path_rate=0.3, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), diff --git a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py index 22ee353d58..107033ea31 100644 --- a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py @@ -17,8 +17,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -34,5 +34,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py index a380f94e6a..259f83642b 100644 --- a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py @@ -19,8 +19,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -36,5 +36,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py index b0bb90ea48..d3c89394d7 100644 --- a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py @@ -19,8 +19,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -36,5 +36,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py index b1d2803c13..42511d1f30 100644 --- a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py +++ b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py @@ -20,8 +20,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -37,5 +37,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py index 84aade606b..dcfe65ab85 100644 --- a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py @@ -14,8 +14,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -31,5 +31,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py b/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py index 2f4dd2cfed..ce1fbf9a9d 100644 --- a/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py index 43f734530f..34c1c16b06 100644 --- a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py index 42754d39e6..173e4b421f 100644 --- a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py +++ b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py @@ -19,8 +19,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -36,5 +36,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py index 0fe0f7b664..3aa65881c3 100644 --- a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py @@ -18,8 +18,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -35,5 +35,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py index 3ca16e4398..5ba08fb032 100644 --- a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py +++ b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py @@ -20,8 +20,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -37,5 +37,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py index 876c66492e..e31e22e794 100644 --- a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py @@ -13,8 +13,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -30,5 +30,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py b/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py index bd68a15f68..09a33bd690 100644 --- a/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py index bc921f94ab..de9afd47d2 100644 --- a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py @@ -19,8 +19,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -36,5 +36,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py index d14c13baa7..85b9cbfbff 100644 --- a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py index 0bb3c81cbc..343524bbf5 100644 --- a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py @@ -18,8 +18,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -35,5 +35,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py index 325e84b058..36fed902c3 100644 --- a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py +++ b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py @@ -20,8 +20,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -37,5 +37,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py index e0ea7817f1..d2fadb1785 100644 --- a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py @@ -13,8 +13,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -30,5 +30,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py index 8c6a886769..5f790915d2 100644 --- a/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py index 2c14c77e66..dce5ee1531 100644 --- a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) diff --git a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py index 3fa5764abf..0d8f4ea240 100644 --- a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py @@ -19,8 +19,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) @@ -36,5 +36,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py index 0bfa7ba0cf..eb87949055 100644 --- a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py @@ -18,8 +18,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py index 754f919194..c14006c67d 100644 --- a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py @@ -20,8 +20,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) diff --git a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py index 19509e4a31..6ecbdc7fb9 100644 --- a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py @@ -13,8 +13,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) diff --git a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py index 28767616e4..7273fd573b 100644 --- a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py @@ -16,8 +16,8 @@ weight_decay=0.01, paramwise_cfg=dict( custom_keys={ - 'absolute_pos_embed': dict(decay_mult=0.), - 'relative_position_bias_table': dict(decay_mult=0.), + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) From d605af62c1106f927fd8f283ae206b99c5668469 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Thu, 6 May 2021 15:15:53 +0800 Subject: [PATCH 09/32] add deit case at init_weight --- configs/_base_/models/deeplabv3_vit-d16.py | 1 - configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py | 2 +- configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py | 2 +- configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py | 4 ++-- configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py | 4 ++-- configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py | 2 +- configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py | 2 +- configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py | 4 ++-- configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py | 4 ++-- configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py | 2 +- configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py | 2 +- configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py | 4 ++-- configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py | 4 ++-- configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py | 4 +--- configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py | 2 +- configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py | 6 ++---- configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py | 6 ++---- configs/vit/upernet_vit-d16_512x512_80k_ade20k.py | 2 -- mmseg/models/backbones/vit.py | 2 ++ 19 files changed, 26 insertions(+), 33 deletions(-) diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py index b3debcf32e..1a9b37e078 100644 --- a/configs/_base_/models/deeplabv3_vit-d16.py +++ b/configs/_base_/models/deeplabv3_vit-d16.py @@ -53,4 +53,3 @@ # model training and testing settings train_cfg=dict(), test_cfg=dict(mode='whole')) -find_unused_parameters = True diff --git a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py index 107033ea31..dff99ef57b 100644 --- a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py @@ -5,7 +5,7 @@ ] model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth') +deit_base_distilled_patch16_384-d0272ac0.pth') # AdamW optimizer, no weight decay for position embedding& layer norm # in backbone diff --git a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py index 259f83642b..2eebe1ee6f 100644 --- a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py @@ -5,7 +5,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth', +deit_base_distilled_patch16_384-d0272ac0.pth', decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py index d3c89394d7..b6d4177df5 100644 --- a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py @@ -6,8 +6,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6)) +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384)) # AdamW optimizer, no weight decay for position embedding& layer norm # in backbone diff --git a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py index 42511d1f30..caca53c9e9 100644 --- a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py +++ b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6), +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384), decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py index 34c1c16b06..aa360b583f 100644 --- a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py @@ -4,7 +4,7 @@ ] model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth') +deit_base_distilled_patch16_384-d0272ac0.pth') # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone diff --git a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py index 173e4b421f..883e3633df 100644 --- a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py +++ b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py @@ -5,7 +5,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth', +deit_base_distilled_patch16_384-d0272ac0.pth', decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py index 3aa65881c3..0ade722883 100644 --- a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6)) +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384)) # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone diff --git a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py index 5ba08fb032..2c8619f06e 100644 --- a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py +++ b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6), +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384), decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py index de9afd47d2..a1265df552 100644 --- a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py @@ -5,7 +5,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth', +deit_base_distilled_patch16_384-d0272ac0.pth', decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py index 85b9cbfbff..2a177a2fa3 100644 --- a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py @@ -4,7 +4,7 @@ ] model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth') +deit_base_distilled_patch16_384-d0272ac0.pth') # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone diff --git a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py index 343524bbf5..1d706a1760 100644 --- a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6)) +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384)) # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone diff --git a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py index 36fed902c3..8695c93c11 100644 --- a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py +++ b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6), +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384), decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py index dce5ee1531..3d049eafc5 100644 --- a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py @@ -4,7 +4,7 @@ ] model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth') +deit_base_distilled_patch16_384-d0272ac0.pth') # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py index 0d8f4ea240..8308c1b04b 100644 --- a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py @@ -5,7 +5,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_patch16_224-b5f2ef4d.pth', +deit_base_distilled_patch16_384-d0272ac0.pth', decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py index eb87949055..e7f8c81e43 100644 --- a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6)) +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384)) # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone @@ -35,5 +35,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py index c14006c67d..3122663d4f 100644 --- a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py @@ -5,8 +5,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6), +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384), decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) @@ -37,5 +37,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py index 7273fd573b..02878bfd03 100644 --- a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py @@ -33,5 +33,3 @@ # By default, models are trained on 8 GPUs with 2 images per GPU data = dict(samples_per_gpu=2) - -find_unused_parameters = True diff --git a/mmseg/models/backbones/vit.py b/mmseg/models/backbones/vit.py index 1d730d863b..e28ed9222d 100644 --- a/mmseg/models/backbones/vit.py +++ b/mmseg/models/backbones/vit.py @@ -317,6 +317,8 @@ def init_weights(self, pretrained=None): checkpoint = _load_checkpoint(pretrained, logger=logger) if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] else: state_dict = checkpoint From 425cac77e61e9444f9da0eb01d4f477b674fb661 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 11 May 2021 16:52:56 +0800 Subject: [PATCH 10/32] add upernet result --- configs/vit/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index ee68c684dd..0ef474a7d8 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -20,8 +20,8 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | UPerNet | Vit | 512x1024 | 40000 | | | | | | -| UPerNet | Deit-S | 512x1024 | 40000 | | | | | | -| UPerNet | Deit-B | 512x1024 | 40000 | | | | | | +| UPerNet | Deit-S | 512x1024 | 40000 | | | | 69.28 | | +| UPerNet | Deit-B | 512x1024 | 40000 | | | | 73.35 | | | DeepLabV3 | Vit | 512x1024 | 40000 | | | | | | | DeepLabV3 | Deit-S | 512x1024 | 40000 | | | | | | | DeepLabV3 | Deit-B | 512x1024 | 40000 | | | | | | @@ -36,9 +36,9 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | Vit | 512x512 | 80000 | | | | | | -| UPerNet | Deit-S | 512x512 | 80000 | | | | | | -| UPerNet | Deit-B | 512x512 | 80000 | | | | | | +| UPerNet | Vit | 512x512 | 80000 | | | | 45.99 | | +| UPerNet | Deit-S | 512x512 | 80000 | | | | 40.86 | | +| UPerNet | Deit-B | 512x512 | 80000 | | | | 44.62 | | | DeepLabV3 | Vit | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-S | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-B | 512x512 | 80000 | | | | | | From dd6856e90d15fc73abd81a88d6afdbd8e0acea78 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Wed, 12 May 2021 12:53:35 +0800 Subject: [PATCH 11/32] update result and add upernet 160k config --- configs/vit/README.md | 12 +++---- .../upernet_deitB-d16_512x512_160k_ade20k.py | 7 ++++ .../upernet_deitS-d16_512x512_160k_ade20k.py | 9 +++++ .../upernet_vit-d16_512x512_160k_ade20k.py | 35 +++++++++++++++++++ 4 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_vit-d16_512x512_160k_ade20k.py diff --git a/configs/vit/README.md b/configs/vit/README.md index 0ef474a7d8..2fa0db8cd9 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,9 +19,9 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| UPerNet | Vit | 512x1024 | 40000 | | | | | | -| UPerNet | Deit-S | 512x1024 | 40000 | | | | 69.28 | | -| UPerNet | Deit-B | 512x1024 | 40000 | | | | 73.35 | | +| UPerNet | Vit | 512x1024 | 40000 | | | 72.61 | | | +| UPerNet | Deit-S | 512x1024 | 40000 | | | 69.28 | | | +| UPerNet | Deit-B | 512x1024 | 40000 | | | 73.35 | | | | DeepLabV3 | Vit | 512x1024 | 40000 | | | | | | | DeepLabV3 | Deit-S | 512x1024 | 40000 | | | | | | | DeepLabV3 | Deit-B | 512x1024 | 40000 | | | | | | @@ -36,9 +36,9 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | Vit | 512x512 | 80000 | | | | 45.99 | | -| UPerNet | Deit-S | 512x512 | 80000 | | | | 40.86 | | -| UPerNet | Deit-B | 512x512 | 80000 | | | | 44.62 | | +| UPerNet | Vit | 512x512 | 80000 | | |45.99 | | | +| UPerNet | Deit-S | 512x512 | 80000 | | |40.86 | | | +| UPerNet | Deit-B | 512x512 | 80000 | | |44.62 | | | | DeepLabV3 | Vit | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-S | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-B | 512x512 | 80000 | | | | | | diff --git a/configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py b/configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py new file mode 100644 index 0000000000..bdb16df7e7 --- /dev/null +++ b/configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_base_distilled_patch16_384-d0272ac0.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py b/configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py new file mode 100644 index 0000000000..3cc91577f7 --- /dev/null +++ b/configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384), + neck=dict(in_channels=[384]), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_vit-d16_512x512_160k_ade20k.py b/configs/vit/upernet_vit-d16_512x512_160k_ade20k.py new file mode 100644 index 0000000000..3c99cdda9b --- /dev/null +++ b/configs/vit/upernet_vit-d16_512x512_160k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) From bd86b64442442694bd4fdae693eb5d788f2c86c4 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 17 May 2021 10:06:35 +0800 Subject: [PATCH 12/32] update upernet result and fix settings --- configs/vit/README.md | 3 +++ configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py | 2 +- configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py | 3 ++- configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py | 1 + configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py | 2 +- 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index 2fa0db8cd9..f56efd930c 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -37,8 +37,11 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | UPerNet | Vit | 512x512 | 80000 | | |45.99 | | | +| UPerNet | Vit | 512x512 | 80000 | | |45.88 | | | | UPerNet | Deit-S | 512x512 | 80000 | | |40.86 | | | +| UPerNet | Deit-S | 512x512 | 80000 | | |41.71 | | | | UPerNet | Deit-B | 512x512 | 80000 | | |44.62 | | | +| UPerNet | Deit-B | 512x512 | 80000 | | |44.69 | | | | DeepLabV3 | Vit | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-S | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-B | 512x512 | 80000 | | | | | | diff --git a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py index dcfe65ab85..febf88f115 100644 --- a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py @@ -1,7 +1,7 @@ _base_ = [ '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', - '../_base_/schedules/schedule_80k.py' + '../_base_/schedules/schedule_40k.py' ] # AdamW optimizer, no weight decay for position embedding & layer norm diff --git a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py index e7f8c81e43..4be0cf2b7b 100644 --- a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py @@ -6,7 +6,8 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/\ deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384)) + backbone=dict(num_heads=6, embed_dim=384), + neck=dict(in_channels=[384])) # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py index 3122663d4f..a6993c161e 100644 --- a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py @@ -8,6 +8,7 @@ deit_small_distilled_patch16_224-649709d9.pth', backbone=dict(num_heads=6, embed_dim=384), decode_head=dict(num_classes=150), + neck=dict(in_channels=[384]), auxiliary_head=dict(num_classes=150)) # AdamW optimizer, no weight decay for position embedding & layer norm diff --git a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py index 6ecbdc7fb9..fa58aacee5 100644 --- a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py @@ -1,6 +1,6 @@ _base_ = [ '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] # AdamW optimizer, no weight decay for position embedding & layer norm From 3f8db0520f9a11a126b047a65193b4e4c2034fe2 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 17 May 2021 10:34:43 +0800 Subject: [PATCH 13/32] Update iters number --- configs/vit/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index f56efd930c..b746858c43 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -37,11 +37,11 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | UPerNet | Vit | 512x512 | 80000 | | |45.99 | | | -| UPerNet | Vit | 512x512 | 80000 | | |45.88 | | | +| UPerNet | Vit | 512x512 | 160000 | | |45.88 | | | | UPerNet | Deit-S | 512x512 | 80000 | | |40.86 | | | -| UPerNet | Deit-S | 512x512 | 80000 | | |41.71 | | | +| UPerNet | Deit-S | 512x512 | 160000 | | |41.71 | | | | UPerNet | Deit-B | 512x512 | 80000 | | |44.62 | | | -| UPerNet | Deit-B | 512x512 | 80000 | | |44.69 | | | +| UPerNet | Deit-B | 512x512 | 160000 | | |44.69 | | | | DeepLabV3 | Vit | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-S | 512x512 | 80000 | | | | | | | DeepLabV3 | Deit-B | 512x512 | 80000 | | | | | | From 4d7776154bcb9e01151ecd90281bf0f0d6f26615 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 18 May 2021 15:20:09 +0800 Subject: [PATCH 14/32] update result and delete some configs --- configs/_base_/models/deeplabv3_vit-d16.py | 55 ------------------ configs/_base_/models/fcn_vit-d16.py | 56 ------------------- configs/_base_/models/pspnet_vit-d16.py | 55 ------------------ ...{upernet_vit-d16.py => upernet_vit-b16.py} | 7 +-- configs/vit/README.md | 40 ++++--------- ...labv3_deitB-d16-512x1024_40k_cityscapes.py | 36 ------------ .../deeplabv3_deitB-d16-512x512_80k_ade20k.py | 38 ------------- ...labv3_deitS-d16-512x1024_40k_cityscapes.py | 38 ------------- .../deeplabv3_deitS-d16-512x512_80k_ade20k.py | 39 ------------- ...eplabv3_vit-d16_512x1024_40k_cityscapes.py | 33 ----------- .../deeplabv3_vit-d16_512x512_80k_ade20k.py | 35 ------------ .../fcn_deitB-d16_512x1024_40k_cityscapes.py | 35 ------------ .../vit/fcn_deitB-d16_512x512_40k_ade20k.py | 38 ------------- .../fcn_deitS-d16_512x1024_40k_cityscapes.py | 37 ------------ .../vit/fcn_deitS-d16_512x512_80k_ade20k.py | 39 ------------- .../fcn_vit-d16_512x1024_40k_cityscapes.py | 32 ----------- configs/vit/fcn_vit-d16_512x512_80k_ade20k.py | 35 ------------ .../pspnet_deitB-d16-512x512_80k_ade20k.py | 38 ------------- ...spnet_deitB-d16_512x1024_40k_cityscapes.py | 35 ------------ ...spnet_deitS-d16_512x1024_40k_cityscapes.py | 37 ------------ .../pspnet_deitS-d16_512x512_80k_ade20k.py | 39 ------------- .../pspnet_vit-d16_512x1024_40k_cityscapes.py | 32 ----------- .../vit/pspnet_vit-d16_512x512_80k_ade20k.py | 35 ------------ ...ernet_deit-b16_512x1024_40k_cityscapes.py} | 8 +-- .../upernet_deit-b16_512x512_160k_ade20k.py | 7 +++ ...=> upernet_deit-b16_512x512_80k_ade20k.py} | 9 +-- ...ernet_deit-s16_512x1024_40k_cityscapes.py} | 12 ++-- .../upernet_deit-s16_512x512_160k_ade20k.py | 8 +++ ...=> upernet_deit-s16_512x512_80k_ade20k.py} | 14 ++--- .../upernet_deitB-d16_512x512_160k_ade20k.py | 7 --- .../upernet_deitS-d16_512x512_160k_ade20k.py | 9 --- ...pernet_vit-b16_512x1024_40k_cityscapes.py} | 3 - ...=> upernet_vit-b16_512x512_160k_ade20k.py} | 3 - ... => upernet_vit-b16_512x512_80k_ade20k.py} | 3 - 34 files changed, 45 insertions(+), 902 deletions(-) delete mode 100644 configs/_base_/models/deeplabv3_vit-d16.py delete mode 100644 configs/_base_/models/fcn_vit-d16.py delete mode 100644 configs/_base_/models/pspnet_vit-d16.py rename configs/_base_/models/{upernet_vit-d16.py => upernet_vit-b16.py} (90%) delete mode 100644 configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py delete mode 100644 configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py delete mode 100644 configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py delete mode 100644 configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py delete mode 100644 configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py delete mode 100644 configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py delete mode 100644 configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py delete mode 100644 configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/fcn_vit-d16_512x512_80k_ade20k.py delete mode 100644 configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py delete mode 100644 configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py delete mode 100644 configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py rename configs/vit/{upernet_deitB-d16-512x1024_40k_cityscapes.py => upernet_deit-b16_512x1024_40k_cityscapes.py} (78%) create mode 100644 configs/vit/upernet_deit-b16_512x512_160k_ade20k.py rename configs/vit/{upernet_deitB-d16-512x512_80k_ade20k.py => upernet_deit-b16_512x512_80k_ade20k.py} (76%) rename configs/vit/{upernet_deitS-d16-512x1024_40k_cityscapes.py => upernet_deit-s16_512x1024_40k_cityscapes.py} (69%) create mode 100644 configs/vit/upernet_deit-s16_512x512_160k_ade20k.py rename configs/vit/{upernet_deitS-d16-512x512_80k_ade20k.py => upernet_deit-s16_512x512_80k_ade20k.py} (67%) delete mode 100644 configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py delete mode 100644 configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py rename configs/vit/{upernet_vit-d16_512x1024_40k_cityscapes.py => upernet_vit-b16_512x1024_40k_cityscapes.py} (88%) rename configs/vit/{upernet_vit-d16_512x512_160k_ade20k.py => upernet_vit-b16_512x512_160k_ade20k.py} (89%) rename configs/vit/{upernet_vit-d16_512x512_80k_ade20k.py => upernet_vit-b16_512x512_80k_ade20k.py} (89%) diff --git a/configs/_base_/models/deeplabv3_vit-d16.py b/configs/_base_/models/deeplabv3_vit-d16.py deleted file mode 100644 index 1a9b37e078..0000000000 --- a/configs/_base_/models/deeplabv3_vit-d16.py +++ /dev/null @@ -1,55 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ -download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', - backbone=dict( - type='VisionTransformer', - img_size=(512, 512), - patch_size=16, - in_channels=3, - embed_dim=768, - depth=12, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.3, - norm_cfg=dict(type='LN', eps=1e-6), - act_cfg=dict(type='GELU'), - norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768, - scales=[0.5, 1, 2, 4]), - decode_head=dict( - type='ASPPHead', - in_channels=768, - in_index=1, - channels=512, - dilations=(1, 12, 24, 36), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - auxiliary_head=dict( - type='FCNHead', - in_channels=768, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/fcn_vit-d16.py b/configs/_base_/models/fcn_vit-d16.py deleted file mode 100644 index bcf877cb84..0000000000 --- a/configs/_base_/models/fcn_vit-d16.py +++ /dev/null @@ -1,56 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ -download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', - backbone=dict( - type='VisionTransformer', - img_size=(512, 512), - patch_size=16, - in_channels=3, - embed_dim=768, - depth=12, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.3, - norm_cfg=dict(type='LN', eps=1e-6), - act_cfg=dict(type='GELU'), - norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768, - scales=[0.5, 1, 2, 4]), - decode_head=dict( - type='FCNHead', - in_channels=768, - in_index=1, - channels=512, - num_convs=2, - concat_input=True, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - auxiliary_head=dict( - type='FCNHead', - in_channels=768, - in_index=0, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/pspnet_vit-d16.py b/configs/_base_/models/pspnet_vit-d16.py deleted file mode 100644 index e510e36b60..0000000000 --- a/configs/_base_/models/pspnet_vit-d16.py +++ /dev/null @@ -1,55 +0,0 @@ -# model settings -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ -download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', - backbone=dict( - type='VisionTransformer', - img_size=(512, 512), - patch_size=16, - in_channels=3, - embed_dim=768, - depth=12, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.3, - norm_cfg=dict(type='LN', eps=1e-6), - act_cfg=dict(type='GELU'), - norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768, - scales=[0.5, 1, 2, 4]), - decode_head=dict( - type='PSPHead', - in_channels=768, - in_index=1, - channels=512, - pool_scales=(1, 2, 3, 6), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - auxiliary_head=dict( - type='FCNHead', - in_channels=768, - in_index=0, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/upernet_vit-d16.py b/configs/_base_/models/upernet_vit-b16.py similarity index 90% rename from configs/_base_/models/upernet_vit-d16.py rename to configs/_base_/models/upernet_vit-b16.py index acc3030793..18c1510d4c 100644 --- a/configs/_base_/models/upernet_vit-d16.py +++ b/configs/_base_/models/upernet_vit-b16.py @@ -2,8 +2,7 @@ norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', - pretrained='https://github.com/rwightman/pytorch-image-models/releases/\ -download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', + pretrained='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth', # noqa backbone=dict( type='VisionTransformer', img_size=(512, 512), @@ -16,7 +15,7 @@ qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, - drop_path_rate=0.3, + drop_path_rate=0.0, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False), @@ -52,4 +51,4 @@ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), # model training and testing settings train_cfg=dict(), - test_cfg=dict(mode='whole')) + test_cfg=dict(mode='whole')) # yapf: disable diff --git a/configs/vit/README.md b/configs/vit/README.md index b746858c43..6ae90b1f61 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -5,9 +5,9 @@ ```latex -@article{dosovitskiy2020, +@article{dosoViTskiy2020, title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, - author={Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, journal={arXiv preprint arXiv:2010.11929}, year={2020} } @@ -19,35 +19,17 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| UPerNet | Vit | 512x1024 | 40000 | | | 72.61 | | | -| UPerNet | Deit-S | 512x1024 | 40000 | | | 69.28 | | | -| UPerNet | Deit-B | 512x1024 | 40000 | | | 73.35 | | | -| DeepLabV3 | Vit | 512x1024 | 40000 | | | | | | -| DeepLabV3 | Deit-S | 512x1024 | 40000 | | | | | | -| DeepLabV3 | Deit-B | 512x1024 | 40000 | | | | | | -| PSPNet | Vit | 512x1024 | 40000 | | | | | | -| PSPNet | Deit-S | 512x1024 | 40000 | | | | | | -| PSPNet | Deit-B | 512x1024 | 40000 | | | | | | -| FCN | Vit | 512x1024 | 40000 | | | | | | -| FCN | Deit-S | 512x1024 | 40000 | | | | | | -| FCN | Deit-B | 512x1024 | 40000 | | | | | | +| UPerNet | ViT-B | 512x1024 | 40000 | | | 72.61 | | | +| UPerNet | DeiT-S | 512x1024 | 40000 | | | 69.28 | | | +| UPerNet | DeiT-B | 512x1024 | 40000 | | | 73.35 | | | ### ADE20K | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | Vit | 512x512 | 80000 | | |45.99 | | | -| UPerNet | Vit | 512x512 | 160000 | | |45.88 | | | -| UPerNet | Deit-S | 512x512 | 80000 | | |40.86 | | | -| UPerNet | Deit-S | 512x512 | 160000 | | |41.71 | | | -| UPerNet | Deit-B | 512x512 | 80000 | | |44.62 | | | -| UPerNet | Deit-B | 512x512 | 160000 | | |44.69 | | | -| DeepLabV3 | Vit | 512x512 | 80000 | | | | | | -| DeepLabV3 | Deit-S | 512x512 | 80000 | | | | | | -| DeepLabV3 | Deit-B | 512x512 | 80000 | | | | | | -| PSPNet | Vit | 512x512 | 80000 | | | | | | -| PSPNet | Deit-S | 512x512 | 80000 | | | | | | -| PSPNet | Deit-B | 512x512 | 80000 | | | | | | -| FCN | Vit | 512x512 | 80000 | | | | | | -| FCN | Deit-S | 512x512 | 80000 | | | | | | -| FCN | Deit-B | 512x512 | 80000 | | | | | | +| UPerNet | ViT-B | 512x512 | 80000 | | |45.99 | | | +| UPerNet | ViT-B | 512x512 | 160000 | | |45.88 | | | +| UPerNet | DeiT-S | 512x512 | 80000 | | |41.32 | | | +| UPerNet | DeiT-S | 512x512 | 160000 | | | | | | +| UPerNet | DeiT-B | 512x512 | 80000 | | | | | | +| UPerNet | DeiT-B | 512x512 | 160000 | | | | | | diff --git a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py deleted file mode 100644 index dff99ef57b..0000000000 --- a/configs/vit/deeplabv3_deitB-d16-512x1024_40k_cityscapes.py +++ /dev/null @@ -1,36 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', - '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', - '../_base_/schedules/schedule_40k.py' -] - -model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth') - -# AdamW optimizer, no weight decay for position embedding& layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py deleted file mode 100644 index 2eebe1ee6f..0000000000 --- a/configs/vit/deeplabv3_deitB-d16-512x512_80k_ade20k.py +++ /dev/null @@ -1,38 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth', - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding& layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py deleted file mode 100644 index b6d4177df5..0000000000 --- a/configs/vit/deeplabv3_deitS-d16-512x1024_40k_cityscapes.py +++ /dev/null @@ -1,38 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', - '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', - '../_base_/schedules/schedule_40k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384)) - -# AdamW optimizer, no weight decay for position embedding& layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py b/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py deleted file mode 100644 index caca53c9e9..0000000000 --- a/configs/vit/deeplabv3_deitS-d16-512x512_80k_ade20k.py +++ /dev/null @@ -1,39 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding& layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index febf88f115..0000000000 --- a/configs/vit/deeplabv3_vit-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,33 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', - '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', - '../_base_/schedules/schedule_40k.py' -] - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py b/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py deleted file mode 100644 index ce1fbf9a9d..0000000000 --- a/configs/vit/deeplabv3_vit-d16_512x512_80k_ade20k.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/deeplabv3_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding& layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index aa360b583f..0000000000 --- a/configs/vit/fcn_deitB-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth') - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py b/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py deleted file mode 100644 index 883e3633df..0000000000 --- a/configs/vit/fcn_deitB-d16_512x512_40k_ade20k.py +++ /dev/null @@ -1,38 +0,0 @@ -_base_ = [ - '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth', - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index 0ade722883..0000000000 --- a/configs/vit/fcn_deitS-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,37 +0,0 @@ -_base_ = [ - '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py b/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py deleted file mode 100644 index 2c8619f06e..0000000000 --- a/configs/vit/fcn_deitS-d16_512x512_80k_ade20k.py +++ /dev/null @@ -1,39 +0,0 @@ -_base_ = [ - '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index e31e22e794..0000000000 --- a/configs/vit/fcn_vit-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,32 +0,0 @@ -_base_ = [ - '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py b/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py deleted file mode 100644 index 09a33bd690..0000000000 --- a/configs/vit/fcn_vit-d16_512x512_80k_ade20k.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/fcn_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py deleted file mode 100644 index a1265df552..0000000000 --- a/configs/vit/pspnet_deitB-d16-512x512_80k_ade20k.py +++ /dev/null @@ -1,38 +0,0 @@ -_base_ = [ - '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth', - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index 2a177a2fa3..0000000000 --- a/configs/vit/pspnet_deitB-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth') - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index 1d706a1760..0000000000 --- a/configs/vit/pspnet_deitS-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,37 +0,0 @@ -_base_ = [ - '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py deleted file mode 100644 index 8695c93c11..0000000000 --- a/configs/vit/pspnet_deitS-d16_512x512_80k_ade20k.py +++ /dev/null @@ -1,39 +0,0 @@ -_base_ = [ - '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py deleted file mode 100644 index d2fadb1785..0000000000 --- a/configs/vit/pspnet_vit-d16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,32 +0,0 @@ -_base_ = [ - '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py b/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py deleted file mode 100644 index 5f790915d2..0000000000 --- a/configs/vit/pspnet_vit-d16_512x512_80k_ade20k.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/pspnet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py similarity index 78% rename from configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py rename to configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py index 3d049eafc5..cd4dd3a251 100644 --- a/configs/vit/upernet_deitB-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py @@ -3,8 +3,9 @@ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] -model = dict(pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth') +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth' # noqa +) # yapf: disable # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone @@ -30,6 +31,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py new file mode 100644 index 0000000000..7660dbfebb --- /dev/null +++ b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + backbone=dict(drop_path_rate=0.1), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) # yapf: disable diff --git a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py similarity index 76% rename from configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py rename to configs/vit/upernet_deit-b16_512x512_80k_ade20k.py index 8308c1b04b..85eb3ff99a 100644 --- a/configs/vit/upernet_deitB-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -4,10 +4,10 @@ ] model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth', + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + backbone=dict(drop_path_rate=0.1), decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) + auxiliary_head=dict(num_classes=150)) # yapf: disable # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone @@ -33,6 +33,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py similarity index 69% rename from configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py rename to configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py index 4be0cf2b7b..f96b9f22cc 100644 --- a/configs/vit/upernet_deitS-d16-512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py @@ -4,10 +4,11 @@ ] model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - neck=dict(in_channels=[384])) + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa + backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), + neck=dict(in_channels=[384], out_channels=384), + decode_head=dict(in_channels=[384, 384, 384, 384]), + auxiliary_head=dict(in_channels=384)) # yapf: disable # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone @@ -33,6 +34,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py new file mode 100644 index 0000000000..0e54790194 --- /dev/null +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa + backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), + neck=dict(in_channels=[384], out_channels=384), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py similarity index 67% rename from configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py rename to configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index a6993c161e..ddb61f2379 100644 --- a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -4,12 +4,11 @@ ] model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - decode_head=dict(num_classes=150), - neck=dict(in_channels=[384]), - auxiliary_head=dict(num_classes=150)) + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa + backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), + neck=dict(in_channels=[384], out_channels=384), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable # AdamW optimizer, no weight decay for position embedding & layer norm # in backbone @@ -35,6 +34,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py b/configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py deleted file mode 100644 index bdb16df7e7..0000000000 --- a/configs/vit/upernet_deitB-d16_512x512_160k_ade20k.py +++ /dev/null @@ -1,7 +0,0 @@ -_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_base_distilled_patch16_384-d0272ac0.pth', - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py b/configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py deleted file mode 100644 index 3cc91577f7..0000000000 --- a/configs/vit/upernet_deitS-d16_512x512_160k_ade20k.py +++ /dev/null @@ -1,9 +0,0 @@ -_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - neck=dict(in_channels=[384]), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) diff --git a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py similarity index 88% rename from configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py rename to configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py index fa58aacee5..74f792cc9d 100644 --- a/configs/vit/upernet_vit-d16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py @@ -27,6 +27,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-d16_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py similarity index 89% rename from configs/vit/upernet_vit-d16_512x512_160k_ade20k.py rename to configs/vit/upernet_vit-b16_512x512_160k_ade20k.py index 3c99cdda9b..c1153718c4 100644 --- a/configs/vit/upernet_vit-d16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py @@ -30,6 +30,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py similarity index 89% rename from configs/vit/upernet_vit-d16_512x512_80k_ade20k.py rename to configs/vit/upernet_vit-b16_512x512_80k_ade20k.py index 02878bfd03..0297fc1d31 100644 --- a/configs/vit/upernet_vit-d16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py @@ -30,6 +30,3 @@ power=1.0, min_lr=0.0, by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) From 656c1676e7d0628103de20f7eb2755427291fc78 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 18 May 2021 15:30:44 +0800 Subject: [PATCH 15/32] fix import error --- configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py | 2 +- configs/vit/upernet_deit-b16_512x512_160k_ade20k.py | 2 +- configs/vit/upernet_deit-b16_512x512_80k_ade20k.py | 2 +- configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py | 2 +- configs/vit/upernet_deit-s16_512x512_160k_ade20k.py | 2 +- configs/vit/upernet_deit-s16_512x512_80k_ade20k.py | 2 +- configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py | 2 +- configs/vit/upernet_vit-b16_512x512_160k_ade20k.py | 2 +- configs/vit/upernet_vit-b16_512x512_80k_ade20k.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py index cd4dd3a251..ca633b7560 100644 --- a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] diff --git a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py index 7660dbfebb..d1950205a5 100644 --- a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py index 85eb3ff99a..49691d45f1 100644 --- a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] diff --git a/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py index f96b9f22cc..ba44f522c3 100644 --- a/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py index 0e54790194..fe141f1691 100644 --- a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-d16_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index ddb61f2379..e9d66393f1 100644 --- a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] diff --git a/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py index 74f792cc9d..75bcc5ff44 100644 --- a/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] diff --git a/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py index c1153718c4..2e3a1b0308 100644 --- a/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] diff --git a/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py index 0297fc1d31..8245d081a4 100644 --- a/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py @@ -1,5 +1,5 @@ _base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] From 7d13836fb7d89df534eff9d61605cb4003a31632 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 18 May 2021 15:49:49 +0800 Subject: [PATCH 16/32] fix drop_path_rate --- configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py index ca633b7560..6ff1c5eb62 100644 --- a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py @@ -4,7 +4,8 @@ ] model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth' # noqa + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + backbone=dict(drop_path_rate=0.1), ) # yapf: disable # AdamW optimizer, no weight decay for position embedding & layer norm From 8931d7f95576274dcefbe2fd731475680ebd0acf Mon Sep 17 00:00:00 2001 From: xiexinch Date: Wed, 19 May 2021 16:50:32 +0800 Subject: [PATCH 17/32] update result and restore config --- configs/vit/README.md | 2 +- configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py | 3 +++ configs/vit/upernet_deit-b16_512x512_80k_ade20k.py | 3 +++ configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py | 3 +++ configs/vit/upernet_deit-s16_512x512_80k_ade20k.py | 3 +++ configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py | 3 +++ configs/vit/upernet_vit-b16_512x512_160k_ade20k.py | 3 +++ configs/vit/upernet_vit-b16_512x512_80k_ade20k.py | 3 +++ 8 files changed, 22 insertions(+), 1 deletion(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index 6ae90b1f61..7ea39ed504 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -30,6 +30,6 @@ | UPerNet | ViT-B | 512x512 | 80000 | | |45.99 | | | | UPerNet | ViT-B | 512x512 | 160000 | | |45.88 | | | | UPerNet | DeiT-S | 512x512 | 80000 | | |41.32 | | | -| UPerNet | DeiT-S | 512x512 | 160000 | | | | | | +| UPerNet | DeiT-S | 512x512 | 160000 | | | 40.61 | | | | UPerNet | DeiT-B | 512x512 | 80000 | | | | | | | UPerNet | DeiT-B | 512x512 | 160000 | | | | | | diff --git a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py index 6ff1c5eb62..5dc78a1586 100644 --- a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py @@ -32,3 +32,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py index 49691d45f1..181b108e54 100644 --- a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -33,3 +33,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py index ba44f522c3..1c274924e5 100644 --- a/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py @@ -34,3 +34,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index e9d66393f1..ccf6280e60 100644 --- a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -34,3 +34,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py index 75bcc5ff44..e4be31dca1 100644 --- a/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py +++ b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py @@ -27,3 +27,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py index 2e3a1b0308..3660b82f87 100644 --- a/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py @@ -30,3 +30,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py index 8245d081a4..c188e15102 100644 --- a/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py @@ -30,3 +30,6 @@ power=1.0, min_lr=0.0, by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) From 2219cf72938ad48bf8fdaff849274bdbcb7ff0a5 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Fri, 21 May 2021 20:29:04 +0800 Subject: [PATCH 18/32] update benchmark result --- configs/vit/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index 7ea39ed504..b820fc3dd2 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,17 +19,17 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| UPerNet | ViT-B | 512x1024 | 40000 | | | 72.61 | | | -| UPerNet | DeiT-S | 512x1024 | 40000 | | | 69.28 | | | -| UPerNet | DeiT-B | 512x1024 | 40000 | | | 73.35 | | | +| UPerNet | ViT-B | 512x1024 | 40000 | 18.7 | 0.82 | 72.61 | |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x1024_40k_cityscapes/upernet_vit-b16_512x1024_40k_cityscapes-2684468c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x1024_40k_cityscapes/20210510_124209.log.json) | +| UPerNet | DeiT-S | 512x1024 | 40000 | 10.58 | 1.46 | 68.18 | |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x1024_40k_cityscapes/upernet_deit-s16_512x1024_40k_cityscapes-4e761184.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x1024_40k_cityscapes/20210519_165747.log.json) | +| UPerNet | DeiT-B | 512x1024 | 40000 | 18.7 | 0.68 | 73.52 | |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x1024_40k_cityscapes/upernet_deit-b16_512x1024_40k_cityscapes-d0cc1ef0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x1024_40k_cityscapes/20210519_170031.log.json) | ### ADE20K | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B | 512x512 | 80000 | | |45.99 | | | -| UPerNet | ViT-B | 512x512 | 160000 | | |45.88 | | | -| UPerNet | DeiT-S | 512x512 | 80000 | | |41.32 | | | -| UPerNet | DeiT-S | 512x512 | 160000 | | | 40.61 | | | -| UPerNet | DeiT-B | 512x512 | 80000 | | | | | | -| UPerNet | DeiT-B | 512x512 | 160000 | | | | | | +| UPerNet | ViT-B | 512x512 | 80000 | 8.8 | 7.86 |45.99 | 48.06 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_80k_ade20k/upernet_vit-b16_512x512_80k_ade20k-d6b6fbb3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_80k_ade20k/20210509_175430.log.json) | +| UPerNet | ViT-B | 512x512 | 160000 | | 8.41 |45.88 | 47.9 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_160k_ade20k/upernet_vit-b16_512x512_160k_ade20k-178101c0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_160k_ade20k/20210512_130043.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 5.3 | 14.01 | 41.32 | 42.48 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k-9855ed8a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210517_114414.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | | 15.05 | 40.61 | 42.04 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k-f96d1a2f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210517_114547.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 8.9 | 8.51 | 43.31 | 44.95 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k-eb6741cc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210518_162229.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | | 7.79 | 43.21 | 44.84 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k-3a601a75.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210519_163905.log.json) | From 7f1866e971344bad7899984b4be8e7999e854ac7 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Sat, 22 May 2021 12:35:22 +0800 Subject: [PATCH 19/32] remove cityscapes exp --- configs/vit/README.md | 8 ---- ...pernet_deit-b16_512x1024_40k_cityscapes.py | 37 ------------------ ...pernet_deit-s16_512x1024_40k_cityscapes.py | 39 ------------------- ...upernet_vit-b16_512x1024_40k_cityscapes.py | 32 --------------- 4 files changed, 116 deletions(-) delete mode 100644 configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py delete mode 100644 configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py diff --git a/configs/vit/README.md b/configs/vit/README.md index b820fc3dd2..09c214e0aa 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -15,14 +15,6 @@ ## Results and models -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| UPerNet | ViT-B | 512x1024 | 40000 | 18.7 | 0.82 | 72.61 | |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x1024_40k_cityscapes/upernet_vit-b16_512x1024_40k_cityscapes-2684468c.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x1024_40k_cityscapes/20210510_124209.log.json) | -| UPerNet | DeiT-S | 512x1024 | 40000 | 10.58 | 1.46 | 68.18 | |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x1024_40k_cityscapes/upernet_deit-s16_512x1024_40k_cityscapes-4e761184.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x1024_40k_cityscapes/20210519_165747.log.json) | -| UPerNet | DeiT-B | 512x1024 | 40000 | 18.7 | 0.68 | 73.52 | |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x1024_40k_cityscapes/upernet_deit-b16_512x1024_40k_cityscapes-d0cc1ef0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x1024_40k_cityscapes/20210519_170031.log.json) | - ### ADE20K | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | diff --git a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py deleted file mode 100644 index 5dc78a1586..0000000000 --- a/configs/vit/upernet_deit-b16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,37 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa - backbone=dict(drop_path_rate=0.1), -) # yapf: disable - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py b/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py deleted file mode 100644 index 1c274924e5..0000000000 --- a/configs/vit/upernet_deit-s16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,39 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), - neck=dict(in_channels=[384], out_channels=384), - decode_head=dict(in_channels=[384, 384, 384, 384]), - auxiliary_head=dict(in_channels=384)) # yapf: disable - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py b/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py deleted file mode 100644 index e4be31dca1..0000000000 --- a/configs/vit/upernet_vit-b16_512x1024_40k_cityscapes.py +++ /dev/null @@ -1,32 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/cityscapes.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' -] - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) From 69cb384307e43ca849f7f32417a717e179b21a5d Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 24 May 2021 15:18:30 +0800 Subject: [PATCH 20/32] remove neck --- configs/_base_/models/upernet_vit-b16.py | 11 ++++------- configs/vit/upernet_deit-s16_512x512_160k_ade20k.py | 1 - configs/vit/upernet_deit-s16_512x512_80k_ade20k.py | 1 - 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/configs/_base_/models/upernet_vit-b16.py b/configs/_base_/models/upernet_vit-b16.py index 18c1510d4c..930ce3dd7d 100644 --- a/configs/_base_/models/upernet_vit-b16.py +++ b/configs/_base_/models/upernet_vit-b16.py @@ -11,6 +11,7 @@ embed_dim=768, depth=12, mlp_ratio=4, + out_indices=(2, 5, 8, 11), qkv_bias=True, qk_scale=None, drop_rate=0.0, @@ -18,12 +19,8 @@ drop_path_rate=0.0, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), - norm_eval=False), - neck=dict( - type='MultiLevelNeck', - in_channels=[768], - out_channels=768, - scales=[0.5, 1, 2, 4]), + norm_eval=False, + out_shape='NCHW'), decode_head=dict( type='UPerHead', in_channels=[768, 768, 768, 768], @@ -39,7 +36,7 @@ auxiliary_head=dict( type='FCNHead', in_channels=768, - in_index=0, + in_index=3, channels=256, num_convs=1, concat_input=False, diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py index fe141f1691..aba6fc08e9 100644 --- a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -3,6 +3,5 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), - neck=dict(in_channels=[384], out_channels=384), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index ccf6280e60..fbb65d3b3b 100644 --- a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -6,7 +6,6 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), - neck=dict(in_channels=[384], out_channels=384), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable From cebbf6f376a2dad3394261174e0d03c62fab4737 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Wed, 26 May 2021 16:52:07 +0800 Subject: [PATCH 21/32] neck exp --- configs/_base_/models/upernet_vit-b16.py | 5 +++++ configs/vit/upernet_deit-s16_512x512_160k_ade20k.py | 1 + configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py | 8 ++++++++ 3 files changed, 14 insertions(+) create mode 100644 configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py diff --git a/configs/_base_/models/upernet_vit-b16.py b/configs/_base_/models/upernet_vit-b16.py index 930ce3dd7d..36043347de 100644 --- a/configs/_base_/models/upernet_vit-b16.py +++ b/configs/_base_/models/upernet_vit-b16.py @@ -21,6 +21,11 @@ act_cfg=dict(type='GELU'), norm_eval=False, out_shape='NCHW'), + neck=dict( + type='MultiLevelNeck', + in_channels=[768, 768, 768, 768], + out_channels=768, + scales=[4, 2, 1, 0.5]), decode_head=dict( type='UPerHead', in_channels=[768, 768, 768, 768], diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py index aba6fc08e9..f98642dca1 100644 --- a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -4,4 +4,5 @@ pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py new file mode 100644 index 0000000000..b8fdc81056 --- /dev/null +++ b/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa + backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1, final_norm=True), # noqa + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable From ebc0531bfadc2207b5b982d360dd5b22ec8463ba Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 21 Jun 2021 11:18:17 +0800 Subject: [PATCH 22/32] add more configs --- configs/vit/uper_deitS_exp_ade_160k.py | 42 +++++++++++++++++++ configs/vit/uper_deitS_exp_ade_80k.py | 42 +++++++++++++++++++ ...ernet_deit-b16_neck_512x512_160k_ade20k.py | 5 +++ ...ernet_deit-b16_norm_512x512_160k_ade20k.py | 5 +++ .../upernet_deitS-d16-512x512_80k_ade20k_.py | 39 +++++++++++++++++ ...pernet_deitS-d16-512x512_80k_ade20k_384.py | 40 ++++++++++++++++++ ...pernet_vit-b16_neck_512x512_160k_ade20k.py | 35 ++++++++++++++++ ...pernet_vit-b16_norm_512x512_160k_ade20k.py | 36 ++++++++++++++++ 8 files changed, 244 insertions(+) create mode 100644 configs/vit/uper_deitS_exp_ade_160k.py create mode 100644 configs/vit/uper_deitS_exp_ade_80k.py create mode 100644 configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py create mode 100644 configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py create mode 100644 configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py diff --git a/configs/vit/uper_deitS_exp_ade_160k.py b/configs/vit/uper_deitS_exp_ade_160k.py new file mode 100644 index 0000000000..bdede32bf9 --- /dev/null +++ b/configs/vit/uper_deitS_exp_ade_160k.py @@ -0,0 +1,42 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', + backbone=dict( + num_heads=6, + embed_dim=384, + drop_path_rate=0.1), + neck=dict(in_channels=[384], out_channels=384), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + auxiliary_head=dict(num_classes=150, in_channels=384)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/uper_deitS_exp_ade_80k.py b/configs/vit/uper_deitS_exp_ade_80k.py new file mode 100644 index 0000000000..69f3ba13c4 --- /dev/null +++ b/configs/vit/uper_deitS_exp_ade_80k.py @@ -0,0 +1,42 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', + backbone=dict( + num_heads=6, + embed_dim=384, + drop_path_rate=0.1), + neck=dict(in_channels=[384], out_channels=384), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + auxiliary_head=dict(num_classes=150, in_channels=384)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py new file mode 100644 index 0000000000..6399f3ef2f --- /dev/null +++ b/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + backbone=dict(drop_path_rate=0.1),) # yapf: disable diff --git a/configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py new file mode 100644 index 0000000000..2a4d7404af --- /dev/null +++ b/configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + backbone=dict(drop_path_rate=0.1, final_norm=True)) # yapf: disable diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py new file mode 100644 index 0000000000..9ff54bf1d1 --- /dev/null +++ b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dim=384), + neck=dict(in_channels=[384]), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py new file mode 100644 index 0000000000..2a033ad0b6 --- /dev/null +++ b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/\ +deit_small_distilled_patch16_224-649709d9.pth', + backbone=dict(num_heads=6, embed_dim=384), + neck=dict(in_channels=[384], out_channels=384), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + auxiliary_head=dict(num_classes=150, in_channels=384)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py new file mode 100644 index 0000000000..3660b82f87 --- /dev/null +++ b/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py new file mode 100644 index 0000000000..0376097fee --- /dev/null +++ b/configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + backbone=dict( drop_path_rate=0.1, final_norm=True), + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) From 319f56ec5524ed1e5546b976d11eb40a37fa7654 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 21 Jun 2021 15:03:34 +0800 Subject: [PATCH 23/32] fix init error --- configs/_base_/models/upernet_vit-b16.py | 10 +++-- .../upernet_deit-b16_512x512_160k_ade20k.py | 3 +- .../upernet_deit-b16_512x512_80k_ade20k.py | 38 ++---------------- .../upernet_deit-s16_512x512_160k_ade20k.py | 4 +- .../upernet_deit-s16_512x512_80k_ade20k.py | 36 ++--------------- ...ernet_deit-s16_neck_512x512_160k_ade20k.py | 8 ++++ ...ernet_deit-s16_norm_512x512_160k_ade20k.py | 2 +- .../upernet_deitS-d16-512x512_80k_ade20k_.py | 39 ------------------ ...pernet_deitS-d16-512x512_80k_ade20k_384.py | 40 ------------------- 9 files changed, 24 insertions(+), 156 deletions(-) create mode 100644 configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py delete mode 100644 configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py delete mode 100644 configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py diff --git a/configs/_base_/models/upernet_vit-b16.py b/configs/_base_/models/upernet_vit-b16.py index 36043347de..573612e13a 100644 --- a/configs/_base_/models/upernet_vit-b16.py +++ b/configs/_base_/models/upernet_vit-b16.py @@ -8,19 +8,21 @@ img_size=(512, 512), patch_size=16, in_channels=3, - embed_dim=768, - depth=12, + embed_dims=768, + num_layers=12, + num_heads=12, mlp_ratio=4, out_indices=(2, 5, 8, 11), qkv_bias=True, - qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.0, + with_cls_token=True, norm_cfg=dict(type='LN', eps=1e-6), act_cfg=dict(type='GELU'), norm_eval=False, - out_shape='NCHW'), + out_shape='NCHW', + interpolate_mode='bicubic'), neck=dict( type='MultiLevelNeck', in_channels=[768, 768, 768, 768], diff --git a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py index d1950205a5..567a089ab4 100644 --- a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py @@ -3,5 +3,4 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa backbone=dict(drop_path_rate=0.1), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) # yapf: disable + neck=None) # yapf: disable diff --git a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py index 181b108e54..c867e9c07c 100644 --- a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -1,38 +1,6 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] +_base_ = './upernet_vit-b16_512x512_80k_ade20k.py' model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa backbone=dict(drop_path_rate=0.1), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) # yapf: disable - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) + neck=None) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py index f98642dca1..717dcdbe6a 100644 --- a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -2,7 +2,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), - neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + neck=None, auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index fbb65d3b3b..3433b260fc 100644 --- a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -1,38 +1,8 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] +_base_ = './upernet_vit-b16_512x512_80k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1), + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=None, auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py new file mode 100644 index 0000000000..9b54a825eb --- /dev/null +++ b/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), # noqa + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py index b8fdc81056..faf54f01e8 100644 --- a/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py @@ -2,7 +2,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dim=384, drop_path_rate=0.1, final_norm=True), # noqa + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), # noqa decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py deleted file mode 100644 index 9ff54bf1d1..0000000000 --- a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_.py +++ /dev/null @@ -1,39 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', - backbone=dict(num_heads=6, embed_dim=384), - neck=dict(in_channels=[384]), - decode_head=dict(num_classes=150), - auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py b/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py deleted file mode 100644 index 2a033ad0b6..0000000000 --- a/configs/vit/upernet_deitS-d16-512x512_80k_ade20k_384.py +++ /dev/null @@ -1,40 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/\ -deit_small_distilled_patch16_224-649709d9.pth', - backbone=dict(num_heads=6, embed_dim=384), - neck=dict(in_channels=[384], out_channels=384), - decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), - auxiliary_head=dict(num_classes=150, in_channels=384)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) From c4aa7b65331c172417296a96f49d915b8b2674f2 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 21 Jun 2021 16:33:11 +0800 Subject: [PATCH 24/32] fix ffn setting --- mmseg/models/backbones/vit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmseg/models/backbones/vit.py b/mmseg/models/backbones/vit.py index a0b945bb23..256acdf03a 100644 --- a/mmseg/models/backbones/vit.py +++ b/mmseg/models/backbones/vit.py @@ -75,7 +75,7 @@ def __init__(self, feedforward_channels=feedforward_channels, num_fcs=num_fcs, ffn_drop=drop_rate, - dropout_layer=None, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), act_cfg=act_cfg) @property From 8457e67be053b3cafa023b5f8af62bc1d474311c Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 22 Jun 2021 20:46:22 +0800 Subject: [PATCH 25/32] update result --- configs/vit/README.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index 09c214e0aa..0cb6780512 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,9 +19,13 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B | 512x512 | 80000 | 8.8 | 7.86 |45.99 | 48.06 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_80k_ade20k/upernet_vit-b16_512x512_80k_ade20k-d6b6fbb3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_80k_ade20k/20210509_175430.log.json) | -| UPerNet | ViT-B | 512x512 | 160000 | | 8.41 |45.88 | 47.9 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_160k_ade20k/upernet_vit-b16_512x512_160k_ade20k-178101c0.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_512x512_160k_ade20k/20210512_130043.log.json) | -| UPerNet | DeiT-S | 512x512 | 80000 | 5.3 | 14.01 | 41.32 | 42.48 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k-9855ed8a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210517_114414.log.json) | -| UPerNet | DeiT-S | 512x512 | 160000 | | 15.05 | 40.61 | 42.04 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k-f96d1a2f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210517_114547.log.json) | -| UPerNet | DeiT-B | 512x512 | 80000 | 8.9 | 8.51 | 43.31 | 44.95 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k-eb6741cc.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210518_162229.log.json) | -| UPerNet | DeiT-B | 512x512 | 160000 | | 7.79 | 43.21 | 44.84 |[config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k-3a601a75.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210519_163905.log.json) | +| UPerNet | ViT-B | 512x512 | 80000 | | | | | | | +| UPerNet | ViT-B | 512x512 | 160000 | | | | | | | +| UPerNet | DeiT-S | 512x512 | 80000 | | | | | | | +| UPerNet | DeiT-S | 512x512 | 160000 | | | 42.87 | | | | +| UPerNet | DeiT-S + neck | 512x512 | 160000 | | | | | | | +| UPerNet | DeiT-S + norm | 512x512 | 160000 | | | | | | | +| UPerNet | DeiT-B | 512x512 | 80000 | | | | | | | +| UPerNet | DeiT-B | 512x512 | 160000 | | | 45.36 | | | | +| UPerNet | DeiT-B + neck | 512x512 | 160000 | | | | | | | +| UPerNet | DeiT-B + neck | 512x512 | 160000 | | | | | | | From b0f9e294cc924e76550a12eb67686c4ab8ce71fb Mon Sep 17 00:00:00 2001 From: xiexinch Date: Wed, 23 Jun 2021 14:14:42 +0800 Subject: [PATCH 26/32] update results --- configs/vit/README.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index 0cb6780512..7897dc97d5 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,13 +19,15 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B | 512x512 | 80000 | | | | | | | -| UPerNet | ViT-B | 512x512 | 160000 | | | | | | | -| UPerNet | DeiT-S | 512x512 | 80000 | | | | | | | -| UPerNet | DeiT-S | 512x512 | 160000 | | | 42.87 | | | | -| UPerNet | DeiT-S + neck | 512x512 | 160000 | | | | | | | -| UPerNet | DeiT-S + norm | 512x512 | 160000 | | | | | | | -| UPerNet | DeiT-B | 512x512 | 80000 | | | | | | | -| UPerNet | DeiT-B | 512x512 | 160000 | | | 45.36 | | | | -| UPerNet | DeiT-B + neck | 512x512 | 160000 | | | | | | | -| UPerNet | DeiT-B + neck | 512x512 | 160000 | | | | | | | +| UPerNet | ViT-B | 512x512 | 80000 | | | train | | | | +| UPerNet | ViT-B | 512x512 | 160000 | | | train | | | | +| UPerNet | ViT-B + neck | 512x512 | 160000 | | | train | | | | +| UPerNet | ViT-B + norm | 512x512 | 160000 | | | 47.73 | | | | +| UPerNet | DeiT-S | 512x512 | 80000 | | | train | | | | +| UPerNet | DeiT-S | 512x512 | 160000 | | | 42.87 | | | | +| UPerNet | DeiT-S + neck | 512x512 | 160000 | | | 43.82 | | | | +| UPerNet | DeiT-S + norm | 512x512 | 160000 | | | 43.52 | | | | +| UPerNet | DeiT-B | 512x512 | 80000 | | | train | | | | +| UPerNet | DeiT-B | 512x512 | 160000 | | | 45.36 | | | | +| UPerNet | DeiT-B + neck | 512x512 | 160000 | | | 45.46 | | | | +| UPerNet | DeiT-B + norm | 512x512 | 160000 | | | train | | | | From 74424aa4ff86d561bd950b5a64194f0a938074b8 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Fri, 25 Jun 2021 10:47:55 +0800 Subject: [PATCH 27/32] update result --- configs/vit/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index 7897dc97d5..8356caf40c 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,15 +19,15 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B | 512x512 | 80000 | | | train | | | | +| UPerNet | ViT-B | 512x512 | 80000 | | | 47.71 | | | | | UPerNet | ViT-B | 512x512 | 160000 | | | train | | | | | UPerNet | ViT-B + neck | 512x512 | 160000 | | | train | | | | | UPerNet | ViT-B + norm | 512x512 | 160000 | | | 47.73 | | | | -| UPerNet | DeiT-S | 512x512 | 80000 | | | train | | | | +| UPerNet | DeiT-S | 512x512 | 80000 | | | 42.96 | | | | | UPerNet | DeiT-S | 512x512 | 160000 | | | 42.87 | | | | | UPerNet | DeiT-S + neck | 512x512 | 160000 | | | 43.82 | | | | | UPerNet | DeiT-S + norm | 512x512 | 160000 | | | 43.52 | | | | -| UPerNet | DeiT-B | 512x512 | 80000 | | | train | | | | +| UPerNet | DeiT-B | 512x512 | 80000 | | | 45.24 | | | | | UPerNet | DeiT-B | 512x512 | 160000 | | | 45.36 | | | | | UPerNet | DeiT-B + neck | 512x512 | 160000 | | | 45.46 | | | | -| UPerNet | DeiT-B + norm | 512x512 | 160000 | | | train | | | | +| UPerNet | DeiT-B + norm | 512x512 | 160000 | | | 45.37 | | | | From 8c11f6202d826a39372e4e69c1e48c45b07cef3f Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 28 Jun 2021 16:23:45 +0800 Subject: [PATCH 28/32] update results and fill table --- configs/vit/README.md | 23 ++++++------ .../upernet_deit-b16_512x512_160k_ade20k.py | 2 +- .../upernet_deit-b16_512x512_80k_ade20k.py | 2 +- ...ernet_deit-b16_neck_512x512_160k_ade20k.py | 2 +- ...16_neck_ln-backbone_512x512_160k_ade20k.py | 5 +++ .../upernet_deit-s16_512x512_160k_ade20k.py | 2 +- .../upernet_deit-s16_512x512_80k_ade20k.py | 2 +- ...ernet_deit-s16_neck_512x512_160k_ade20k.py | 2 +- ...16_neck_ln-backbone_512x512_160k_ade20k.py | 8 ++++ ...upernet_vit-b16_neck_512x512_80k_ade20k.py | 35 ++++++++++++++++++ ...16_neck_ln-backbone_512x512_160k_ade20k.py | 37 +++++++++++++++++++ 11 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py create mode 100644 configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py create mode 100644 configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py diff --git a/configs/vit/README.md b/configs/vit/README.md index 8356caf40c..fb29791e7c 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,15 +19,14 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B | 512x512 | 80000 | | | 47.71 | | | | -| UPerNet | ViT-B | 512x512 | 160000 | | | train | | | | -| UPerNet | ViT-B + neck | 512x512 | 160000 | | | train | | | | -| UPerNet | ViT-B + norm | 512x512 | 160000 | | | 47.73 | | | | -| UPerNet | DeiT-S | 512x512 | 80000 | | | 42.96 | | | | -| UPerNet | DeiT-S | 512x512 | 160000 | | | 42.87 | | | | -| UPerNet | DeiT-S + neck | 512x512 | 160000 | | | 43.82 | | | | -| UPerNet | DeiT-S + norm | 512x512 | 160000 | | | 43.52 | | | | -| UPerNet | DeiT-B | 512x512 | 80000 | | | 45.24 | | | | -| UPerNet | DeiT-B | 512x512 | 160000 | | | 45.36 | | | | -| UPerNet | DeiT-B + neck | 512x512 | 160000 | | | 45.46 | | | | -| UPerNet | DeiT-B + norm | 512x512 | 160000 | | | 45.37 | | | | +| UPerNet | ViT-B + neck | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130547.log.json) | +| UPerNet | ViT-B + neck | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_192432.log.json) | +| UPerNet | ViT-B + norm | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_172828.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k-afc93ec2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_095228.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k-5110d916.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_160903.log.json) | +| UPerNet | DeiT-S + neck | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | +| UPerNet | DeiT-S + norm | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k-1e090789.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130529.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k-828705d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_180100.log.json) | +| UPerNet | DeiT-B + neck | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_191949.log.json) | +| UPerNet | DeiT-B + norm | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_153535.log.json) | diff --git a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py index 567a089ab4..0cacbc150b 100644 --- a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py index c867e9c07c..65fabcd2b4 100644 --- a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_512x512_80k_ade20k.py' +_base_ = './upernet_vit-b16_neck_512x512_80k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py index 6399f3ef2f..9d7ff5b288 100644 --- a/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py new file mode 100644 index 0000000000..5d98cca6a9 --- /dev/null +++ b/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa + backbone=dict(drop_path_rate=0.1, final_norm=True)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py index 717dcdbe6a..a58d859647 100644 --- a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index 3433b260fc..8d4129fe96 100644 --- a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_512x512_80k_ade20k.py' +_base_ = './upernet_vit-b16_neck_512x512_80k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py index 9b54a825eb..959ce8a1f6 100644 --- a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py new file mode 100644 index 0000000000..f8a52fa8e4 --- /dev/null +++ b/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' + +model = dict( + pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), # noqa + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py new file mode 100644 index 0000000000..c188e15102 --- /dev/null +++ b/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py new file mode 100644 index 0000000000..70bb6fe80b --- /dev/null +++ b/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + backbone=dict(drop_path_rate=0.1, final_norm=True), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) From 31ba78d4073b615f16d87977cfc68adff9c07069 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 28 Jun 2021 16:27:19 +0800 Subject: [PATCH 29/32] delete or rename configs --- configs/vit/uper_deitS_exp_ade_160k.py | 42 ------------------- configs/vit/uper_deitS_exp_ade_80k.py | 42 ------------------- ...ernet_deit-b16_norm_512x512_160k_ade20k.py | 5 --- ...ernet_deit-s16_norm_512x512_160k_ade20k.py | 8 ---- .../upernet_vit-b16_512x512_160k_ade20k.py | 35 ---------------- .../vit/upernet_vit-b16_512x512_80k_ade20k.py | 35 ---------------- ...pernet_vit-b16_norm_512x512_160k_ade20k.py | 36 ---------------- 7 files changed, 203 deletions(-) delete mode 100644 configs/vit/uper_deitS_exp_ade_160k.py delete mode 100644 configs/vit/uper_deitS_exp_ade_80k.py delete mode 100644 configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py delete mode 100644 configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py delete mode 100644 configs/vit/upernet_vit-b16_512x512_160k_ade20k.py delete mode 100644 configs/vit/upernet_vit-b16_512x512_80k_ade20k.py delete mode 100644 configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py diff --git a/configs/vit/uper_deitS_exp_ade_160k.py b/configs/vit/uper_deitS_exp_ade_160k.py deleted file mode 100644 index bdede32bf9..0000000000 --- a/configs/vit/uper_deitS_exp_ade_160k.py +++ /dev/null @@ -1,42 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', - backbone=dict( - num_heads=6, - embed_dim=384, - drop_path_rate=0.1), - neck=dict(in_channels=[384], out_channels=384), - decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), - auxiliary_head=dict(num_classes=150, in_channels=384)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/uper_deitS_exp_ade_80k.py b/configs/vit/uper_deitS_exp_ade_80k.py deleted file mode 100644 index 69f3ba13c4..0000000000 --- a/configs/vit/uper_deitS_exp_ade_80k.py +++ /dev/null @@ -1,42 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-d16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', - backbone=dict( - num_heads=6, - embed_dim=384, - drop_path_rate=0.1), - neck=dict(in_channels=[384], out_channels=384), - decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), - auxiliary_head=dict(num_classes=150, in_channels=384)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py deleted file mode 100644 index 2a4d7404af..0000000000 --- a/configs/vit/upernet_deit-b16_norm_512x512_160k_ade20k.py +++ /dev/null @@ -1,5 +0,0 @@ -_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa - backbone=dict(drop_path_rate=0.1, final_norm=True)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py deleted file mode 100644 index faf54f01e8..0000000000 --- a/configs/vit/upernet_deit-s16_norm_512x512_160k_ade20k.py +++ /dev/null @@ -1,8 +0,0 @@ -_base_ = './upernet_vit-b16_512x512_160k_ade20k.py' - -model = dict( - pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), # noqa - decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), - neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), - auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py deleted file mode 100644 index 3660b82f87..0000000000 --- a/configs/vit/upernet_vit-b16_512x512_160k_ade20k.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' -] - -model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py deleted file mode 100644 index c188e15102..0000000000 --- a/configs/vit/upernet_vit-b16_512x512_80k_ade20k.py +++ /dev/null @@ -1,35 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' -] - -model = dict( - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) diff --git a/configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py deleted file mode 100644 index 0376097fee..0000000000 --- a/configs/vit/upernet_vit-b16_norm_512x512_160k_ade20k.py +++ /dev/null @@ -1,36 +0,0 @@ -_base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' -] - -model = dict( - backbone=dict( drop_path_rate=0.1, final_norm=True), - decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) - -# AdamW optimizer, no weight decay for position embedding & layer norm -# in backbone -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.00006, - betas=(0.9, 0.999), - weight_decay=0.01, - paramwise_cfg=dict( - custom_keys={ - 'pos_embed': dict(decay_mult=0.), - 'cls_token': dict(decay_mult=0.), - 'norm': dict(decay_mult=0.) - })) - -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) - -# By default, models are trained on 8 GPUs with 2 images per GPU -data = dict(samples_per_gpu=2) From 53ae35c3b8e93fb6123e35920dac5b0ca9af5a35 Mon Sep 17 00:00:00 2001 From: xiexinch Date: Mon, 28 Jun 2021 16:34:32 +0800 Subject: [PATCH 30/32] fix link delimiter --- configs/vit/README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/configs/vit/README.md b/configs/vit/README.md index fb29791e7c..daecb879a1 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,14 +19,14 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B + neck | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130547.log.json) | -| UPerNet | ViT-B + neck | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_192432.log.json) | -| UPerNet | ViT-B + norm | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_172828.log.json) | -| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k-afc93ec2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_095228.log.json) | -| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k-5110d916.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_160903.log.json) | -| UPerNet | DeiT-S + neck | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | -| UPerNet | DeiT-S + norm | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | -| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k-1e090789.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130529.log.json) | -| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k-828705d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_180100.log.json) | -| UPerNet | DeiT-B + neck | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_191949.log.json) | -| UPerNet | DeiT-B + norm | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_153535.log.json) | +| UPerNet | ViT-B + neck | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130547.log.json) | +| UPerNet | ViT-B + neck | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_192432.log.json) | +| UPerNet | ViT-B + norm | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_172828.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k-afc93ec2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_095228.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k-5110d916.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_160903.log.json) | +| UPerNet | DeiT-S + neck | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | +| UPerNet | DeiT-S + norm | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k-1e090789.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130529.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k-828705d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_180100.log.json) | +| UPerNet | DeiT-B + neck | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_191949.log.json) | +| UPerNet | DeiT-B + norm | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_153535.log.json) | From 451ae11597dee2507c02403da340c9eefc3c2d9b Mon Sep 17 00:00:00 2001 From: xiexinch Date: Tue, 29 Jun 2021 18:14:07 +0800 Subject: [PATCH 31/32] rename configs and fix link --- ...t_vit-b16.py => upernet_vit-b16_ln_mln.py} | 0 configs/vit/README.md | 22 +++++++++---------- ...et_deit-b16_ln_mln_512x512_160k_ade20k.py} | 0 ...et_deit-s16_ln_mln_512x512_160k_ade20k.py} | 6 ++++- ...ernet_deit-s16_neck_512x512_160k_ade20k.py | 2 +- ...net_vit-b16_ln_mln_512x512_160k_ade20k.py} | 5 +++-- ...pernet_vit-b16_neck_512x512_160k_ade20k.py | 5 +++-- ...upernet_vit-b16_neck_512x512_80k_ade20k.py | 5 +++-- 8 files changed, 26 insertions(+), 19 deletions(-) rename configs/_base_/models/{upernet_vit-b16.py => upernet_vit-b16_ln_mln.py} (100%) rename configs/vit/{upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py => upernet_deit-b16_ln_mln_512x512_160k_ade20k.py} (100%) rename configs/vit/{upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py => upernet_deit-s16_ln_mln_512x512_160k_ade20k.py} (76%) rename configs/vit/{upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py => upernet_vit-b16_ln_mln_512x512_160k_ade20k.py} (83%) diff --git a/configs/_base_/models/upernet_vit-b16.py b/configs/_base_/models/upernet_vit-b16_ln_mln.py similarity index 100% rename from configs/_base_/models/upernet_vit-b16.py rename to configs/_base_/models/upernet_vit-b16_ln_mln.py diff --git a/configs/vit/README.md b/configs/vit/README.md index daecb879a1..777d682b45 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,14 +19,14 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B + neck | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130547.log.json) | -| UPerNet | ViT-B + neck | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_192432.log.json) | -| UPerNet | ViT-B + norm | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_172828.log.json) | -| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k-afc93ec2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_095228.log.json) | -| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k-5110d916.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_160903.log.json) | -| UPerNet | DeiT-S + neck | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | -| UPerNet | DeiT-S + norm | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_161021.log.json) | -| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k-1e090789.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210624_130529.log.json) | -| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k-828705d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_180100.log.json) | -| UPerNet | DeiT-B + neck | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210621_191949.log.json) | -| UPerNet | DeiT-B + norm | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/20210623_153535.log.json) | +| UPerNet | ViT-B + neck | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k/upernet_vit-b16_neck_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k/20210624_130547.log.json) | +| UPerNet | ViT-B + neck | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k/upernet_vit-b16_neck_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k/20210623_192432.log.json) | +| UPerNet | ViT-B + LN +MLN | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k-afc93ec2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k-5110d916.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json) | +| UPerNet | DeiT-S + neck | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k/upernet_deit-s16_neck_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-S + LN +MLN | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k-1e090789.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k-828705d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json) | +| UPerNet | DeiT-B + neck | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k/upernet_deit-b16_neck_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k/20210621_191949.log.json) | +| UPerNet | DeiT-B + LN +MLN | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | diff --git a/configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py similarity index 100% rename from configs/vit/upernet_deit-b16_neck_ln-backbone_512x512_160k_ade20k.py rename to configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py diff --git a/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py similarity index 76% rename from configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py rename to configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py index f8a52fa8e4..9672617481 100644 --- a/configs/vit/upernet_deit-s16_neck_ln-backbone_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py @@ -2,7 +2,11 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), # noqa + backbone=dict( + num_heads=6, + embed_dims=384, + drop_path_rate=0.1, + final_norm=True), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py index 959ce8a1f6..68fcae1805 100644 --- a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py @@ -2,7 +2,7 @@ model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa - backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), # noqa + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), auxiliary_head=dict(num_classes=150, in_channels=384)) # yapf: disable diff --git a/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py similarity index 83% rename from configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py rename to configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py index 70bb6fe80b..f6f85378b0 100644 --- a/configs/vit/upernet_vit-b16_neck_ln-backbone_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py @@ -1,6 +1,7 @@ _base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' ] model = dict( diff --git a/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py index 3660b82f87..cc286f1fb2 100644 --- a/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py @@ -1,6 +1,7 @@ _base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' ] model = dict( diff --git a/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py index c188e15102..d80b0d9fd8 100644 --- a/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py @@ -1,6 +1,7 @@ _base_ = [ - '../_base_/models/upernet_vit-b16.py', '../_base_/datasets/ade20k.py', - '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' ] model = dict( From 8db5cddb96e8afdd8e66012711673969ff87e3de Mon Sep 17 00:00:00 2001 From: xiexinch Date: Thu, 1 Jul 2021 15:24:34 +0800 Subject: [PATCH 32/32] rename neck to mln --- configs/vit/README.md | 14 +++++++------- .../vit/upernet_deit-b16_512x512_160k_ade20k.py | 2 +- configs/vit/upernet_deit-b16_512x512_80k_ade20k.py | 2 +- .../upernet_deit-b16_ln_mln_512x512_160k_ade20k.py | 2 +- ...=> upernet_deit-b16_mln_512x512_160k_ade20k.py} | 2 +- .../vit/upernet_deit-s16_512x512_160k_ade20k.py | 2 +- configs/vit/upernet_deit-s16_512x512_80k_ade20k.py | 2 +- .../upernet_deit-s16_ln_mln_512x512_160k_ade20k.py | 2 +- ...=> upernet_deit-s16_mln_512x512_160k_ade20k.py} | 2 +- ... => upernet_vit-b16_mln_512x512_160k_ade20k.py} | 0 ...y => upernet_vit-b16_mln_512x512_80k_ade20k.py} | 0 11 files changed, 15 insertions(+), 15 deletions(-) rename configs/vit/{upernet_deit-b16_neck_512x512_160k_ade20k.py => upernet_deit-b16_mln_512x512_160k_ade20k.py} (74%) rename configs/vit/{upernet_deit-s16_neck_512x512_160k_ade20k.py => upernet_deit-s16_mln_512x512_160k_ade20k.py} (87%) rename configs/vit/{upernet_vit-b16_neck_512x512_160k_ade20k.py => upernet_vit-b16_mln_512x512_160k_ade20k.py} (100%) rename configs/vit/{upernet_vit-b16_neck_512x512_80k_ade20k.py => upernet_vit-b16_mln_512x512_80k_ade20k.py} (100%) diff --git a/configs/vit/README.md b/configs/vit/README.md index 777d682b45..f0b0e16887 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -19,14 +19,14 @@ | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B + neck | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k/upernet_vit-b16_neck_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_80k_ade20k/20210624_130547.log.json) | -| UPerNet | ViT-B + neck | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k/upernet_vit-b16_neck_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_neck_512x512_160k_ade20k/20210623_192432.log.json) | -| UPerNet | ViT-B + LN +MLN | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | +| UPerNet | ViT-B + MLN | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k-0403cee1.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json) | +| UPerNet | ViT-B + MLN | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k-852fa768.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json) | +| UPerNet | ViT-B + LN + MLN | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | | UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k-afc93ec2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json) | | UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k-5110d916.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json) | -| UPerNet | DeiT-S + neck | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k/upernet_deit-s16_neck_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_neck_512x512_160k_ade20k/20210621_161021.log.json) | -| UPerNet | DeiT-S + LN +MLN | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-S + MLN | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k-fb9a5dfb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-S + LN + MLN | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | | UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k-1e090789.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json) | | UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k-828705d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json) | -| UPerNet | DeiT-B + neck | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k/upernet_deit-b16_neck_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_neck_512x512_160k_ade20k/20210621_191949.log.json) | -| UPerNet | DeiT-B + LN +MLN | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | +| UPerNet | DeiT-B + MLN | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k-4e1450f3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json) | +| UPerNet | DeiT-B + LN + MLN | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k-8a959c14.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | diff --git a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py index 0cacbc150b..6f17d7a646 100644 --- a/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py index 65fabcd2b4..7bff28a10d 100644 --- a/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_80k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_80k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py index 5d98cca6a9..f5b2411df1 100644 --- a/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py similarity index 74% rename from configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py rename to configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py index 9d7ff5b288..68efd48937 100644 --- a/configs/vit/upernet_deit-b16_neck_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py index a58d859647..cae6f466c5 100644 --- a/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py index 8d4129fe96..b176abb792 100644 --- a/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py +++ b/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_80k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_80k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py index 9672617481..f328ca860a 100644 --- a/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py similarity index 87% rename from configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py rename to configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py index 68fcae1805..a1e1c2a4e2 100644 --- a/configs/vit/upernet_deit-s16_neck_512x512_160k_ade20k.py +++ b/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py @@ -1,4 +1,4 @@ -_base_ = './upernet_vit-b16_neck_512x512_160k_ade20k.py' +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' model = dict( pretrained='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth', # noqa diff --git a/configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py similarity index 100% rename from configs/vit/upernet_vit-b16_neck_512x512_160k_ade20k.py rename to configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py diff --git a/configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py similarity index 100% rename from configs/vit/upernet_vit-b16_neck_512x512_80k_ade20k.py rename to configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py