Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support SegNeXt (NeurIPS'2022) in MMSeg 0.x. #2247

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1310a5d
configs
FabianSchuetze Oct 29, 2022
8f7deca
trains
FabianSchuetze Oct 29, 2022
79e2482
correct configs
FabianSchuetze Oct 31, 2022
4a20eda
proper attribution
FabianSchuetze Oct 31, 2022
c56d243
adjust config for better training
FabianSchuetze Nov 2, 2022
b3b85c9
Lifted hardcoded values to config files
FabianSchuetze Nov 5, 2022
b8c6aaa
revert original naming and permit cpu device
FabianSchuetze Nov 11, 2022
ed83982
bump v0.30.0 (#2462)
xiexinch Jan 11, 2023
ba7608c
[Fix] Fix no revert_sync_batchnorm in image_demo of master branch (#2…
MengzhangLI Jan 16, 2023
6cb7fe0
Imagenet-s dataset for large-scale semantic segmentation (#2480)
gasvn Jan 16, 2023
5d49918
[Fix] Switch order of `reduce_zero_label` and applying `label_map` (#…
siddancha Jan 19, 2023
64ad587
[Fix] Fix ignore class id from -1 to 255 in `master` (#2515)
siddancha Jan 28, 2023
ac5d650
[CI] Upgrade the version of isort to fix lint error in master branch …
xiexinch Jan 29, 2023
190063f
[Fix] Fix `reduce_zero_label` in evaluation (#2504)
siddancha Jan 30, 2023
b29cc34
configs
FabianSchuetze Oct 29, 2022
3810e1f
trains
FabianSchuetze Oct 29, 2022
b570699
correct configs
FabianSchuetze Oct 31, 2022
4ed0033
proper attribution
FabianSchuetze Oct 31, 2022
3f529ba
adjust config for better training
FabianSchuetze Nov 2, 2022
d4f8b52
Lifted hardcoded values to config files
FabianSchuetze Nov 5, 2022
68a33ef
update readme and refactor code
MengzhangLI Jan 31, 2023
319dbda
fix conflict
MengzhangLI Jan 31, 2023
83b9c69
refactor segnext
MengzhangLI Feb 7, 2023
688be9d
add ut
MengzhangLI Feb 7, 2023
5f2bb25
add DWConv module
MengzhangLI Feb 9, 2023
22e044d
upload tiny&small&large models & logs
MengzhangLI Feb 14, 2023
4ddea0f
delete mscan.py in config
MengzhangLI Feb 14, 2023
51e129f
rename mscan.py
MengzhangLI Feb 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions configs/_base_/models/segnext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='MSCAN',
embed_dims=[32, 64, 160, 256],
mlp_ratios=[8, 8, 4, 4],
drop_rate=0.0,
drop_path_rate=0.1,
depths=[3, 3, 5, 2],
attention_kernel_sizes=[[5], [1, 7], [1, 11], [1, 21]],
attention_kernel_paddings=[2, (0, 3), (0, 5), (0, 10)],
norm_cfg=dict(type='BN', requires_grad=True)),
decode_head=dict(
type='LightHamHead',
in_channels=[64, 160, 256],
in_index=[1, 2, 3],
channels=256,
ham_channels=256,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=ham_norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
ham_kwargs=dict(
spatial=True,
MD_S=1,
MD_D=512,
MD_R=64,
train_steps=6,
eval_steps=7,
inv_t=100,
eta=0.9,
rand_init=True)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))
117 changes: 117 additions & 0 deletions configs/segnext/segnext_tiny_512x512_adamw_160k_ade20.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
_base_ = [
'../_base_/models/segnext.py',
'../_base_/default_runtime.py',
]
find_unused_parameters = True
# model settings
norm_cfg = dict(type='BN', requires_grad=True)
ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
model = dict(
type='EncoderDecoder',
backbone=dict(
init_cfg=dict(type='Pretrained', checkpoint='/notebooks/mscan_t.pth')),
decode_head=dict(
type='LightHamHead',
in_channels=[64, 160, 256],
in_index=[1, 2, 3],
channels=256,
ham_channels=256,
ham_kwargs=dict(MD_R=16),
dropout_ratio=0.1,
num_classes=150,
norm_cfg=ham_norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

evaluation = dict(interval=8000, metric='mIoU')
checkpoint_config = dict(by_epoch=False, interval=8000)
# optimizer
# 0.00006 is the lr for bs 16, should use 0.00006/8 as lr (need to test)
optimizer = dict(
type='AdamW',
lr=0.00006,
betas=(0.9, 0.999),
weight_decay=0.01,
paramwise_cfg=dict(
custom_keys={
'pos_block': dict(decay_mult=0.),
'norm': dict(decay_mult=0.),
'head': dict(lr_mult=10.)
}))

lr_config = dict(
policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0,
min_lr=0.0,
by_epoch=False)

dataset_type = 'ADE20KDataset'
data_root = '/notebooks/ADEChallengeData2016'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='ResizeToMultiple', size_divisor=32),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=50,
dataset=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/training',
ann_dir='annotations/training',
pipeline=train_pipeline)),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=test_pipeline))

optimizer_config = dict()
# runtime settings
runner = dict(type='IterBasedRunner', max_iters=160000)
checkpoint_config = dict(by_epoch=False, interval=4000)
evaluation = dict(interval=4000, metric='mIoU')
3 changes: 2 additions & 1 deletion mmseg/models/backbones/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .mit import MixVisionTransformer
from .mobilenet_v2 import MobileNetV2
from .mobilenet_v3 import MobileNetV3
from .mscan import MSCAN
from .resnest import ResNeSt
from .resnet import ResNet, ResNetV1c, ResNetV1d
from .resnext import ResNeXt
Expand All @@ -26,5 +27,5 @@
'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT',
'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE'
'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE', 'MSCAN'
]
Loading