From f71cef5756e510219eb9a14a5b8c5f96de77ee95 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 00:21:17 +0800 Subject: [PATCH 01/32] move layer_decay_optimizer_constructor --- mmseg/core/__init__.py | 3 +- .../core/layer_decay_optimizer_constructor.py | 87 ------------------- mmseg/core/optimizers/__init__.py | 7 ++ .../layer_decay_optimizer_constructor.py | 82 +++++++++++++++++ mmseg/core/utils/__init__.py | 7 +- 5 files changed, 91 insertions(+), 95 deletions(-) delete mode 100644 mmseg/core/layer_decay_optimizer_constructor.py create mode 100644 mmseg/core/optimizers/__init__.py rename mmseg/core/{utils => optimizers}/layer_decay_optimizer_constructor.py (62%) diff --git a/mmseg/core/__init__.py b/mmseg/core/__init__.py index c60b48c0c6..e39a38e33c 100644 --- a/mmseg/core/__init__.py +++ b/mmseg/core/__init__.py @@ -1,6 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. from .evaluation import * # noqa: F401, F403 -from .layer_decay_optimizer_constructor import \ - LayerDecayOptimizerConstructor # noqa: F401 +from .optimizers import * # noqa: F401, F403 from .seg import * # noqa: F401, F403 from .utils import * # noqa: F401, F403 diff --git a/mmseg/core/layer_decay_optimizer_constructor.py b/mmseg/core/layer_decay_optimizer_constructor.py deleted file mode 100644 index 30a09ba08e..0000000000 --- a/mmseg/core/layer_decay_optimizer_constructor.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from mmcv.runner import (OPTIMIZER_BUILDERS, DefaultOptimizerConstructor, - get_dist_info) - -from mmseg.utils import get_root_logger - - -def get_num_layer_for_vit(var_name, num_max_layer): - """Get the layer id to set the different learning rates. - - Args: - var_name (str): The key of the model. - num_max_layer (int): Maximum number of backbone layers. - Returns: - layer id (int): Returns the layer id of the key. - """ - - if var_name in ('backbone.cls_token', 'backbone.mask_token', - 'backbone.pos_embed'): - return 0 - elif var_name.startswith('backbone.patch_embed'): - return 0 - elif var_name.startswith('backbone.layers'): - layer_id = int(var_name.split('.')[2]) - return layer_id + 1 - else: - return num_max_layer - 1 - - -@OPTIMIZER_BUILDERS.register_module() -class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor): - """Different learning rates are set for different layers of backbone.""" - - def add_params(self, params, module): - """Add all parameters of module to the params list. - - The parameters of the given module will be added to the list of param - groups, with specific rules defined by paramwise_cfg. - Args: - params (list[dict]): A list of param groups, it will be modified - in place. - module (nn.Module): The module to be added. - """ - parameter_groups = {} - logger = get_root_logger() - logger.info(self.paramwise_cfg) - num_layers = self.paramwise_cfg.get('num_layers') + 2 - layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate') - logger.info(f'Build LayerDecayOptimizerConstructor ' - f'{layer_decay_rate} - {num_layers}') - weight_decay = self.base_wd - for name, param in module.named_parameters(): - if not param.requires_grad: - continue # frozen weights - if len(param.shape) == 1 or name.endswith('.bias') or name in ( - 'pos_embed', 'cls_token'): - group_name = 'no_decay' - this_weight_decay = 0. - else: - group_name = 'decay' - this_weight_decay = weight_decay - layer_id = get_num_layer_for_vit(name, num_layers) - group_name = f'layer_{layer_id}_{group_name}' - if group_name not in parameter_groups: - scale = layer_decay_rate**(num_layers - layer_id - 1) - parameter_groups[group_name] = { - 'weight_decay': this_weight_decay, - 'params': [], - 'param_names': [], - 'lr_scale': scale, - 'group_name': group_name, - 'lr': scale * self.base_lr - } - parameter_groups[group_name]['params'].append(param) - parameter_groups[group_name]['param_names'].append(name) - rank, _ = get_dist_info() - if rank == 0: - to_display = {} - for key in parameter_groups: - to_display[key] = { - 'param_names': parameter_groups[key]['param_names'], - 'lr_scale': parameter_groups[key]['lr_scale'], - 'lr': parameter_groups[key]['lr'], - 'weight_decay': parameter_groups[key]['weight_decay'] - } - logger.info(f'Param groups ={to_display}') - params.extend(parameter_groups.values()) diff --git a/mmseg/core/optimizers/__init__.py b/mmseg/core/optimizers/__init__.py new file mode 100644 index 0000000000..69aa5b9652 --- /dev/null +++ b/mmseg/core/optimizers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .layer_decay_optimizer_constructor import \ + LearningRateDecayOptimizerConstructor, LayerDecayOptimizerConstructor + +__all__ = [ + 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' +] \ No newline at end of file diff --git a/mmseg/core/utils/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py similarity index 62% rename from mmseg/core/utils/layer_decay_optimizer_constructor.py rename to mmseg/core/optimizers/layer_decay_optimizer_constructor.py index ec9dc156d4..6fa39222f2 100644 --- a/mmseg/core/utils/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -146,3 +146,85 @@ def add_params(self, params, module): } logger.info(f'Param groups = {json.dumps(to_display, indent=2)}') params.extend(parameter_groups.values()) + + +def get_num_layer_for_vit(var_name, num_max_layer): + """Get the layer id to set the different learning rates. + + Args: + var_name (str): The key of the model. + num_max_layer (int): Maximum number of backbone layers. + Returns: + layer id (int): Returns the layer id of the key. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.patch_embed'): + return 0 + elif var_name.startswith('backbone.layers'): + layer_id = int(var_name.split('.')[2]) + return layer_id + 1 + else: + return num_max_layer - 1 + + +@OPTIMIZER_BUILDERS.register_module() +class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor): + """Different learning rates are set for different layers of backbone.""" + + def add_params(self, params, module): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + """ + parameter_groups = {} + logger = get_root_logger() + logger.info(self.paramwise_cfg) + num_layers = self.paramwise_cfg.get('num_layers') + 2 + layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate') + logger.info(f'Build LayerDecayOptimizerConstructor ' + f'{layer_decay_rate} - {num_layers}') + weight_decay = self.base_wd + for name, param in module.named_parameters(): + if not param.requires_grad: + continue # frozen weights + if len(param.shape) == 1 or name.endswith('.bias') or name in ( + 'pos_embed', 'cls_token'): + group_name = 'no_decay' + this_weight_decay = 0. + else: + group_name = 'decay' + this_weight_decay = weight_decay + layer_id = get_num_layer_for_vit(name, num_layers) + group_name = f'layer_{layer_id}_{group_name}' + if group_name not in parameter_groups: + scale = layer_decay_rate**(num_layers - layer_id - 1) + parameter_groups[group_name] = { + 'weight_decay': this_weight_decay, + 'params': [], + 'param_names': [], + 'lr_scale': scale, + 'group_name': group_name, + 'lr': scale * self.base_lr + } + parameter_groups[group_name]['params'].append(param) + parameter_groups[group_name]['param_names'].append(name) + rank, _ = get_dist_info() + if rank == 0: + to_display = {} + for key in parameter_groups: + to_display[key] = { + 'param_names': parameter_groups[key]['param_names'], + 'lr_scale': parameter_groups[key]['lr_scale'], + 'lr': parameter_groups[key]['lr'], + 'weight_decay': parameter_groups[key]['weight_decay'] + } + logger.info(f'Param groups ={to_display}') + params.extend(parameter_groups.values()) diff --git a/mmseg/core/utils/__init__.py b/mmseg/core/utils/__init__.py index cb5a0c3fd3..28882893a5 100644 --- a/mmseg/core/utils/__init__.py +++ b/mmseg/core/utils/__init__.py @@ -1,10 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dist_util import check_dist_init, sync_random_seed -from .layer_decay_optimizer_constructor import \ - LearningRateDecayOptimizerConstructor from .misc import add_prefix -__all__ = [ - 'add_prefix', 'LearningRateDecayOptimizerConstructor', 'check_dist_init', - 'sync_random_seed' -] +__all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed'] From 6aa86d282ad1196ff70de9569b3bb030a906e050 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 00:24:08 +0800 Subject: [PATCH 02/32] fix --- mmseg/core/optimizers/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mmseg/core/optimizers/__init__.py b/mmseg/core/optimizers/__init__.py index 69aa5b9652..4fbf4ecfcd 100644 --- a/mmseg/core/optimizers/__init__.py +++ b/mmseg/core/optimizers/__init__.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .layer_decay_optimizer_constructor import \ - LearningRateDecayOptimizerConstructor, LayerDecayOptimizerConstructor +from .layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) __all__ = [ 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' -] \ No newline at end of file +] From 936ab01b1772c3590a4e027234b51806aa7972eb Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 00:32:00 +0800 Subject: [PATCH 03/32] fix --- tests/test_core/test_layer_decay_optimizer_constructor.py | 2 +- .../test_core/test_learning_rate_decay_optimizer_constructor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index f595d31331..936f881652 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -2,7 +2,7 @@ import torch import torch.nn as nn -from mmseg.core.layer_decay_optimizer_constructor import \ +from mmseg.core.optimizers.layer_decay_optimizer_constructor import \ LayerDecayOptimizerConstructor layer_wise_gt_lst = [{ diff --git a/tests/test_core/test_learning_rate_decay_optimizer_constructor.py b/tests/test_core/test_learning_rate_decay_optimizer_constructor.py index 204ca45b9e..eddb99f80b 100644 --- a/tests/test_core/test_learning_rate_decay_optimizer_constructor.py +++ b/tests/test_core/test_learning_rate_decay_optimizer_constructor.py @@ -3,7 +3,7 @@ import torch.nn as nn from mmcv.cnn import ConvModule -from mmseg.core.utils.layer_decay_optimizer_constructor import \ +from mmseg.core.optimizers.layer_decay_optimizer_constructor import \ LearningRateDecayOptimizerConstructor base_lr = 1 From 102c54ff0e2a491c47aed0e52268f8eb270ff8a7 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 00:39:43 +0800 Subject: [PATCH 04/32] merge test_core --- .../test_layer_decay_optimizer_constructor.py | 162 +++++++++++++++++- ...arning_rate_decay_optimizer_constructor.py | 161 ----------------- 2 files changed, 159 insertions(+), 164 deletions(-) delete mode 100644 tests/test_core/test_learning_rate_decay_optimizer_constructor.py diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 936f881652..b30d56084c 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -1,11 +1,167 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn +from mmcv.cnn import ConvModule -from mmseg.core.optimizers.layer_decay_optimizer_constructor import \ - LayerDecayOptimizerConstructor +from mmseg.core.optimizers.layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) + +base_lr = 1 +decay_rate = 2 +base_wd = 0.05 +weight_decay = 0.05 + +stage_wise_gt_lst = [{ + 'weight_decay': 0.0, + 'lr_scale': 128 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 +}, { + 'weight_decay': 0.05, + 'lr_scale': 64 +}, { + 'weight_decay': 0.0, + 'lr_scale': 64 +}, { + 'weight_decay': 0.05, + 'lr_scale': 32 +}, { + 'weight_decay': 0.0, + 'lr_scale': 32 +}, { + 'weight_decay': 0.05, + 'lr_scale': 16 +}, { + 'weight_decay': 0.0, + 'lr_scale': 16 +}, { + 'weight_decay': 0.05, + 'lr_scale': 8 +}, { + 'weight_decay': 0.0, + 'lr_scale': 8 +}, { + 'weight_decay': 0.05, + 'lr_scale': 128 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}] layer_wise_gt_lst = [{ + 'weight_decay': 0.0, + 'lr_scale': 128 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 +}, { + 'weight_decay': 0.05, + 'lr_scale': 64 +}, { + 'weight_decay': 0.0, + 'lr_scale': 64 +}, { + 'weight_decay': 0.05, + 'lr_scale': 32 +}, { + 'weight_decay': 0.0, + 'lr_scale': 32 +}, { + 'weight_decay': 0.05, + 'lr_scale': 16 +}, { + 'weight_decay': 0.0, + 'lr_scale': 16 +}, { + 'weight_decay': 0.05, + 'lr_scale': 2 +}, { + 'weight_decay': 0.0, + 'lr_scale': 2 +}, { + 'weight_decay': 0.05, + 'lr_scale': 128 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}] + + +class ConvNeXtExampleModel(nn.Module): + + def __init__(self): + super().__init__() + self.backbone = nn.ModuleList() + self.backbone.stages = nn.ModuleList() + for i in range(4): + stage = nn.Sequential(ConvModule(3, 4, kernel_size=1, bias=True)) + self.backbone.stages.append(stage) + self.backbone.norm0 = nn.BatchNorm2d(2) + + # add some variables to meet unit test coverate rate + self.backbone.cls_token = nn.Parameter(torch.ones(1)) + self.backbone.mask_token = nn.Parameter(torch.ones(1)) + self.backbone.pos_embed = nn.Parameter(torch.ones(1)) + self.backbone.stem_norm = nn.Parameter(torch.ones(1)) + self.backbone.downsample_norm0 = nn.BatchNorm2d(2) + self.backbone.downsample_norm1 = nn.BatchNorm2d(2) + self.backbone.downsample_norm2 = nn.BatchNorm2d(2) + self.backbone.lin = nn.Parameter(torch.ones(1)) + self.backbone.lin.requires_grad = False + + self.backbone.downsample_layers = nn.ModuleList() + for i in range(4): + stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True)) + self.backbone.downsample_layers.append(stage) + + self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) + + +class PseudoDataParallel(nn.Module): + + def __init__(self): + super().__init__() + self.module = ConvNeXtExampleModel() + + def forward(self, x): + return x + + +def check_convnext_adamw_optimizer(optimizer, gt_lst): + assert isinstance(optimizer, torch.optim.AdamW) + assert optimizer.defaults['lr'] == base_lr + assert optimizer.defaults['weight_decay'] == base_wd + param_groups = optimizer.param_groups + assert len(param_groups) == 12 + for i, param_dict in enumerate(param_groups): + assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] + assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] + assert param_dict['lr_scale'] == param_dict['lr'] + + +def test_convnext_learning_rate_decay_optimizer_constructor(): + + # paramwise_cfg with ConvNeXtExampleModel + model = ConvNeXtExampleModel() + optimizer_cfg = dict( + type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05) + stagewise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='stage_wise', num_layers=6) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, stagewise_paramwise_cfg) + optimizer = optim_constructor(model) + check_convnext_adamw_optimizer(optimizer, stage_wise_gt_lst) + + layerwise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + check_convnext_adamw_optimizer(optimizer, layer_wise_gt_lst) + + +layer_wise_wd_lr = [{ 'weight_decay': 0.0, 'lr_scale': 16 }, { @@ -67,4 +223,4 @@ def test_beit_layer_decay_optimizer_constructor(): optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) - check_beit_adamw_optimizer(optimizer, layer_wise_gt_lst) + check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) diff --git a/tests/test_core/test_learning_rate_decay_optimizer_constructor.py b/tests/test_core/test_learning_rate_decay_optimizer_constructor.py deleted file mode 100644 index eddb99f80b..0000000000 --- a/tests/test_core/test_learning_rate_decay_optimizer_constructor.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -from mmcv.cnn import ConvModule - -from mmseg.core.optimizers.layer_decay_optimizer_constructor import \ - LearningRateDecayOptimizerConstructor - -base_lr = 1 -decay_rate = 2 -base_wd = 0.05 -weight_decay = 0.05 - -stage_wise_gt_lst = [{ - 'weight_decay': 0.0, - 'lr_scale': 128 -}, { - 'weight_decay': 0.0, - 'lr_scale': 1 -}, { - 'weight_decay': 0.05, - 'lr_scale': 64 -}, { - 'weight_decay': 0.0, - 'lr_scale': 64 -}, { - 'weight_decay': 0.05, - 'lr_scale': 32 -}, { - 'weight_decay': 0.0, - 'lr_scale': 32 -}, { - 'weight_decay': 0.05, - 'lr_scale': 16 -}, { - 'weight_decay': 0.0, - 'lr_scale': 16 -}, { - 'weight_decay': 0.05, - 'lr_scale': 8 -}, { - 'weight_decay': 0.0, - 'lr_scale': 8 -}, { - 'weight_decay': 0.05, - 'lr_scale': 128 -}, { - 'weight_decay': 0.05, - 'lr_scale': 1 -}] - -layer_wise_gt_lst = [{ - 'weight_decay': 0.0, - 'lr_scale': 128 -}, { - 'weight_decay': 0.0, - 'lr_scale': 1 -}, { - 'weight_decay': 0.05, - 'lr_scale': 64 -}, { - 'weight_decay': 0.0, - 'lr_scale': 64 -}, { - 'weight_decay': 0.05, - 'lr_scale': 32 -}, { - 'weight_decay': 0.0, - 'lr_scale': 32 -}, { - 'weight_decay': 0.05, - 'lr_scale': 16 -}, { - 'weight_decay': 0.0, - 'lr_scale': 16 -}, { - 'weight_decay': 0.05, - 'lr_scale': 2 -}, { - 'weight_decay': 0.0, - 'lr_scale': 2 -}, { - 'weight_decay': 0.05, - 'lr_scale': 128 -}, { - 'weight_decay': 0.05, - 'lr_scale': 1 -}] - - -class ConvNeXtExampleModel(nn.Module): - - def __init__(self): - super().__init__() - self.backbone = nn.ModuleList() - self.backbone.stages = nn.ModuleList() - for i in range(4): - stage = nn.Sequential(ConvModule(3, 4, kernel_size=1, bias=True)) - self.backbone.stages.append(stage) - self.backbone.norm0 = nn.BatchNorm2d(2) - - # add some variables to meet unit test coverate rate - self.backbone.cls_token = nn.Parameter(torch.ones(1)) - self.backbone.mask_token = nn.Parameter(torch.ones(1)) - self.backbone.pos_embed = nn.Parameter(torch.ones(1)) - self.backbone.stem_norm = nn.Parameter(torch.ones(1)) - self.backbone.downsample_norm0 = nn.BatchNorm2d(2) - self.backbone.downsample_norm1 = nn.BatchNorm2d(2) - self.backbone.downsample_norm2 = nn.BatchNorm2d(2) - self.backbone.lin = nn.Parameter(torch.ones(1)) - self.backbone.lin.requires_grad = False - - self.backbone.downsample_layers = nn.ModuleList() - for i in range(4): - stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True)) - self.backbone.downsample_layers.append(stage) - - self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) - - -class PseudoDataParallel(nn.Module): - - def __init__(self): - super().__init__() - self.module = ConvNeXtExampleModel() - - def forward(self, x): - return x - - -def check_convnext_adamw_optimizer(optimizer, gt_lst): - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults['lr'] == base_lr - assert optimizer.defaults['weight_decay'] == base_wd - param_groups = optimizer.param_groups - assert len(param_groups) == 12 - for i, param_dict in enumerate(param_groups): - assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] - assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] - assert param_dict['lr_scale'] == param_dict['lr'] - - -def test_convnext_learning_rate_decay_optimizer_constructor(): - - # paramwise_cfg with ConvNeXtExampleModel - model = ConvNeXtExampleModel() - optimizer_cfg = dict( - type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05) - stagewise_paramwise_cfg = dict( - decay_rate=decay_rate, decay_type='stage_wise', num_layers=6) - optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, stagewise_paramwise_cfg) - optimizer = optim_constructor(model) - check_convnext_adamw_optimizer(optimizer, stage_wise_gt_lst) - - layerwise_paramwise_cfg = dict( - decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) - optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, layerwise_paramwise_cfg) - optimizer = optim_constructor(model) - check_convnext_adamw_optimizer(optimizer, layer_wise_gt_lst) From 16e0b2f0184f83f8341c78f27f180d1e72178b02 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 18:05:39 +0800 Subject: [PATCH 05/32] fix --- ...ernet_beit-base_8x2_640x640_160k_ade20k.py | 8 +- ...beit-large_fp16_8x1_640x640_160k_ade20k.py | 8 +- mmseg/core/optimizers/__init__.py | 8 +- .../layer_decay_optimizer_constructor.py | 107 ++++-------------- .../test_layer_decay_optimizer_constructor.py | 11 +- 5 files changed, 46 insertions(+), 96 deletions(-) diff --git a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py index b36adc3c0d..3a34aaecc7 100644 --- a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -13,8 +13,12 @@ lr=3e-5, betas=(0.9, 0.999), weight_decay=0.05, - constructor='LayerDecayOptimizerConstructor', - paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.9)) + constructor='LearningRateDecayOptimizerConstructor', + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'layer_wise_vit', + 'num_layers': 12 + }) lr_config = dict( _delete_=True, diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index e6247b7352..3d4ce3fef4 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -27,8 +27,12 @@ lr=2e-5, betas=(0.9, 0.999), weight_decay=0.05, - constructor='LayerDecayOptimizerConstructor', - paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95)) + constructor='LearningRateDecayOptimizerConstructor', + paramwise_cfg={ + 'decay_rate': 0.95, + 'decay_type': 'layer_wise_vit', + 'num_layers': 24 + }) lr_config = dict( _delete_=True, diff --git a/mmseg/core/optimizers/__init__.py b/mmseg/core/optimizers/__init__.py index 4fbf4ecfcd..83db069ee3 100644 --- a/mmseg/core/optimizers/__init__.py +++ b/mmseg/core/optimizers/__init__.py @@ -1,7 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .layer_decay_optimizer_constructor import ( - LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) +from .layer_decay_optimizer_constructor import \ + LearningRateDecayOptimizerConstructor -__all__ = [ - 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' -] +__all__ = ['LearningRateDecayOptimizerConstructor'] diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 6fa39222f2..084da729fa 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -74,6 +74,28 @@ def get_num_layer_stage_wise(var_name, num_max_layer): return num_max_layer - 1 +def get_num_layer_for_vit(var_name, num_max_layer): + """Get the layer id to set the different learning rates. + + Args: + var_name (str): The key of the model. + num_max_layer (int): Maximum number of backbone layers. + Returns: + layer id (int): Returns the layer id of the key. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.patch_embed'): + return 0 + elif var_name.startswith('backbone.layers'): + layer_id = int(var_name.split('.')[2]) + return layer_id + 1 + else: + return num_max_layer - 1 + + @OPTIMIZER_BUILDERS.register_module() class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): """Different learning rates are set for different layers of backbone.""" @@ -115,6 +137,9 @@ def add_params(self, params, module): layer_id = get_num_layer_layer_wise( name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') + elif decay_type == 'layer_wise_vit': + layer_id = get_num_layer_for_vit(name, num_layers) + logger.info(f'set param {name} as id {layer_id}') elif decay_type == 'stage_wise': layer_id = get_num_layer_stage_wise(name, num_layers) logger.info(f'set param {name} as id {layer_id}') @@ -146,85 +171,3 @@ def add_params(self, params, module): } logger.info(f'Param groups = {json.dumps(to_display, indent=2)}') params.extend(parameter_groups.values()) - - -def get_num_layer_for_vit(var_name, num_max_layer): - """Get the layer id to set the different learning rates. - - Args: - var_name (str): The key of the model. - num_max_layer (int): Maximum number of backbone layers. - Returns: - layer id (int): Returns the layer id of the key. - """ - - if var_name in ('backbone.cls_token', 'backbone.mask_token', - 'backbone.pos_embed'): - return 0 - elif var_name.startswith('backbone.patch_embed'): - return 0 - elif var_name.startswith('backbone.layers'): - layer_id = int(var_name.split('.')[2]) - return layer_id + 1 - else: - return num_max_layer - 1 - - -@OPTIMIZER_BUILDERS.register_module() -class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor): - """Different learning rates are set for different layers of backbone.""" - - def add_params(self, params, module): - """Add all parameters of module to the params list. - - The parameters of the given module will be added to the list of param - groups, with specific rules defined by paramwise_cfg. - Args: - params (list[dict]): A list of param groups, it will be modified - in place. - module (nn.Module): The module to be added. - """ - parameter_groups = {} - logger = get_root_logger() - logger.info(self.paramwise_cfg) - num_layers = self.paramwise_cfg.get('num_layers') + 2 - layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate') - logger.info(f'Build LayerDecayOptimizerConstructor ' - f'{layer_decay_rate} - {num_layers}') - weight_decay = self.base_wd - for name, param in module.named_parameters(): - if not param.requires_grad: - continue # frozen weights - if len(param.shape) == 1 or name.endswith('.bias') or name in ( - 'pos_embed', 'cls_token'): - group_name = 'no_decay' - this_weight_decay = 0. - else: - group_name = 'decay' - this_weight_decay = weight_decay - layer_id = get_num_layer_for_vit(name, num_layers) - group_name = f'layer_{layer_id}_{group_name}' - if group_name not in parameter_groups: - scale = layer_decay_rate**(num_layers - layer_id - 1) - parameter_groups[group_name] = { - 'weight_decay': this_weight_decay, - 'params': [], - 'param_names': [], - 'lr_scale': scale, - 'group_name': group_name, - 'lr': scale * self.base_lr - } - parameter_groups[group_name]['params'].append(param) - parameter_groups[group_name]['param_names'].append(name) - rank, _ = get_dist_info() - if rank == 0: - to_display = {} - for key in parameter_groups: - to_display[key] = { - 'param_names': parameter_groups[key]['param_names'], - 'lr_scale': parameter_groups[key]['lr_scale'], - 'lr': parameter_groups[key]['lr'], - 'weight_decay': parameter_groups[key]['weight_decay'] - } - logger.info(f'Param groups ={to_display}') - params.extend(parameter_groups.values()) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index b30d56084c..7a4e2a3a04 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -3,8 +3,8 @@ import torch.nn as nn from mmcv.cnn import ConvModule -from mmseg.core.optimizers.layer_decay_optimizer_constructor import ( - LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) +from mmseg.core.optimizers.layer_decay_optimizer_constructor import \ + LearningRateDecayOptimizerConstructor base_lr = 1 decay_rate = 2 @@ -219,8 +219,9 @@ def test_beit_layer_decay_optimizer_constructor(): model = BEiTExampleModel(depth=3) optimizer_cfg = dict( type='AdamW', lr=1, betas=(0.9, 0.999), weight_decay=0.05) - paramwise_cfg = dict(num_layers=3, layer_decay_rate=2) - optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, - paramwise_cfg) + paramwise_cfg = dict( + decay_rate=2, decay_type='layer_wise_vit', num_layers=3) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) From d7c0d679e029fca0e24a883c6cf6d8e1fc6ea414 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 18:32:45 +0800 Subject: [PATCH 06/32] add DeprecationWarning --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 084da729fa..b441f1dd19 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import json +import warnings from mmcv.runner import (OPTIMIZER_BUILDERS, DefaultOptimizerConstructor, get_dist_info) @@ -138,6 +139,12 @@ def add_params(self, params, module): name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') elif decay_type == 'layer_wise_vit': + warnings.warn( + 'DeprecationWarning: Original ' + 'LayerDecayOptimizerConstructor ' + 'has been deprecated. Please use ' + 'LearningRateDecayOptimizerConstructor instead, ' + 'and set decay_type = layer_wise_vit in paramwise_cfg.') layer_id = get_num_layer_for_vit(name, num_layers) logger.info(f'set param {name} as id {layer_id}') elif decay_type == 'stage_wise': From 2e7e5797d83b7332afa4029c9fc8d9389abdf489 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Fri, 1 Apr 2022 18:58:06 +0800 Subject: [PATCH 07/32] fix DeprecationWarning --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index b441f1dd19..d0639b29c8 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -141,7 +141,7 @@ def add_params(self, params, module): elif decay_type == 'layer_wise_vit': warnings.warn( 'DeprecationWarning: Original ' - 'LayerDecayOptimizerConstructor ' + 'LayerDecayOptimizerConstructor of BEiT ' 'has been deprecated. Please use ' 'LearningRateDecayOptimizerConstructor instead, ' 'and set decay_type = layer_wise_vit in paramwise_cfg.') From 278fc81d8918510bdbe522002136441d10bcc6a1 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Sat, 2 Apr 2022 11:55:00 +0800 Subject: [PATCH 08/32] fix --- ...ernet_beit-base_8x2_640x640_160k_ade20k.py | 8 ++----- ...beit-large_fp16_8x1_640x640_160k_ade20k.py | 8 ++----- mmseg/core/optimizers/__init__.py | 8 ++++--- .../layer_decay_optimizer_constructor.py | 22 ++++++++++++++++++- .../test_layer_decay_optimizer_constructor.py | 11 +++++----- 5 files changed, 35 insertions(+), 22 deletions(-) diff --git a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py index 3a34aaecc7..b36adc3c0d 100644 --- a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -13,12 +13,8 @@ lr=3e-5, betas=(0.9, 0.999), weight_decay=0.05, - constructor='LearningRateDecayOptimizerConstructor', - paramwise_cfg={ - 'decay_rate': 0.9, - 'decay_type': 'layer_wise_vit', - 'num_layers': 12 - }) + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.9)) lr_config = dict( _delete_=True, diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index 3d4ce3fef4..e6247b7352 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -27,12 +27,8 @@ lr=2e-5, betas=(0.9, 0.999), weight_decay=0.05, - constructor='LearningRateDecayOptimizerConstructor', - paramwise_cfg={ - 'decay_rate': 0.95, - 'decay_type': 'layer_wise_vit', - 'num_layers': 24 - }) + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95)) lr_config = dict( _delete_=True, diff --git a/mmseg/core/optimizers/__init__.py b/mmseg/core/optimizers/__init__.py index 83db069ee3..4fbf4ecfcd 100644 --- a/mmseg/core/optimizers/__init__.py +++ b/mmseg/core/optimizers/__init__.py @@ -1,5 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .layer_decay_optimizer_constructor import \ - LearningRateDecayOptimizerConstructor +from .layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) -__all__ = ['LearningRateDecayOptimizerConstructor'] +__all__ = [ + 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' +] diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index d0639b29c8..c650c6b02c 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -117,7 +117,12 @@ def add_params(self, params, module): parameter_groups = {} logger.info(f'self.paramwise_cfg is {self.paramwise_cfg}') num_layers = self.paramwise_cfg.get('num_layers') + 2 - decay_rate = self.paramwise_cfg.get('decay_rate') + if self.paramwise_cfg.get('layer_decay_rate'): + warnings.warn('DeprecationWarning: Layer_decay_rate will ' + 'be deleted, please use decay_rate instead.') + decay_rate = self.paramwise_cfg.get('layer_decay_rate') + else: + decay_rate = self.paramwise_cfg.get('decay_rate') decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise') logger.info('Build LearningRateDecayOptimizerConstructor ' f'{decay_type} {decay_rate} - {num_layers}') @@ -178,3 +183,18 @@ def add_params(self, params, module): } logger.info(f'Param groups = {json.dumps(to_display, indent=2)}') params.extend(parameter_groups.values()) + + +@OPTIMIZER_BUILDERS.register_module() +class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor): + """Different learning rates are set for different layers of backbone.""" + + def __init__(self, optimizer_cfg, paramwise_cfg): + warnings.warn('DeprecationWarning: Original ' + 'LayerDecayOptimizerConstructor of BEiT ' + 'will be deprecated. Please use ' + 'LearningRateDecayOptimizerConstructor instead, ' + 'and set decay_type = layer_wise_vit in paramwise_cfg.') + paramwise_cfg.update({'decay_type': 'layer_wise_vit'}) + super(LayerDecayOptimizerConstructor, + self).__init__(optimizer_cfg, paramwise_cfg) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 7a4e2a3a04..987a5ce903 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -3,8 +3,8 @@ import torch.nn as nn from mmcv.cnn import ConvModule -from mmseg.core.optimizers.layer_decay_optimizer_constructor import \ - LearningRateDecayOptimizerConstructor +from mmseg.core.optimizers.layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) base_lr = 1 decay_rate = 2 @@ -219,9 +219,8 @@ def test_beit_layer_decay_optimizer_constructor(): model = BEiTExampleModel(depth=3) optimizer_cfg = dict( type='AdamW', lr=1, betas=(0.9, 0.999), weight_decay=0.05) - paramwise_cfg = dict( - decay_rate=2, decay_type='layer_wise_vit', num_layers=3) - optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, paramwise_cfg) + paramwise_cfg = dict(layer_decay_rate=2, num_layers=3) + optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, + paramwise_cfg) optimizer = optim_constructor(model) check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) From e57d79e0e8761c5e3b3a006dadbf755c6b03e9d7 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Sat, 2 Apr 2022 12:17:58 +0800 Subject: [PATCH 09/32] fix --- .../layer_decay_optimizer_constructor.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index c650c6b02c..5c257d7b4d 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -117,12 +117,7 @@ def add_params(self, params, module): parameter_groups = {} logger.info(f'self.paramwise_cfg is {self.paramwise_cfg}') num_layers = self.paramwise_cfg.get('num_layers') + 2 - if self.paramwise_cfg.get('layer_decay_rate'): - warnings.warn('DeprecationWarning: Layer_decay_rate will ' - 'be deleted, please use decay_rate instead.') - decay_rate = self.paramwise_cfg.get('layer_decay_rate') - else: - decay_rate = self.paramwise_cfg.get('decay_rate') + decay_rate = self.paramwise_cfg.get('decay_rate') decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise') logger.info('Build LearningRateDecayOptimizerConstructor ' f'{decay_type} {decay_rate} - {num_layers}') @@ -144,12 +139,6 @@ def add_params(self, params, module): name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') elif decay_type == 'layer_wise_vit': - warnings.warn( - 'DeprecationWarning: Original ' - 'LayerDecayOptimizerConstructor of BEiT ' - 'has been deprecated. Please use ' - 'LearningRateDecayOptimizerConstructor instead, ' - 'and set decay_type = layer_wise_vit in paramwise_cfg.') layer_id = get_num_layer_for_vit(name, num_layers) logger.info(f'set param {name} as id {layer_id}') elif decay_type == 'stage_wise': @@ -196,5 +185,8 @@ def __init__(self, optimizer_cfg, paramwise_cfg): 'LearningRateDecayOptimizerConstructor instead, ' 'and set decay_type = layer_wise_vit in paramwise_cfg.') paramwise_cfg.update({'decay_type': 'layer_wise_vit'}) + warnings.warn('DeprecationWarning: Layer_decay_rate will ' + 'be deleted, please use decay_rate instead.') + paramwise_cfg['decay_rate'] = paramwise_cfg.pop('layer_decay_rate') super(LayerDecayOptimizerConstructor, self).__init__(optimizer_cfg, paramwise_cfg) From 4d8131df7ea7afcaef3dffe5f89231e224a5bf87 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 4 Apr 2022 02:23:39 +0800 Subject: [PATCH 10/32] fix --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 5c257d7b4d..9b6e4dfb40 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -99,7 +99,8 @@ def get_num_layer_for_vit(var_name, num_max_layer): @OPTIMIZER_BUILDERS.register_module() class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): - """Different learning rates are set for different layers of backbone.""" + # Different learning rates are set for different layers of backbone. + # Note: Currently, this optimizer constructor is built for ConvNeXt. def add_params(self, params, module): """Add all parameters of module to the params list. @@ -176,7 +177,8 @@ def add_params(self, params, module): @OPTIMIZER_BUILDERS.register_module() class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor): - """Different learning rates are set for different layers of backbone.""" + # Different learning rates are set for different layers of backbone. + # Note: Currently, this optimizer constructor is built for BEiT. def __init__(self, optimizer_cfg, paramwise_cfg): warnings.warn('DeprecationWarning: Original ' From 29ebc06fd60897a3393a3063dd59f08bad02240e Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 5 Apr 2022 12:39:37 +0800 Subject: [PATCH 11/32] fix --- .../core/optimizers/layer_decay_optimizer_constructor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 9b6e4dfb40..bf6cf99d0b 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -100,7 +100,8 @@ def get_num_layer_for_vit(var_name, num_max_layer): @OPTIMIZER_BUILDERS.register_module() class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): # Different learning rates are set for different layers of backbone. - # Note: Currently, this optimizer constructor is built for ConvNeXt. + # Note: Currently, this optimizer constructor is built for ConvNeXt + # and BEiT. def add_params(self, params, module): """Add all parameters of module to the params list. @@ -134,7 +135,6 @@ def add_params(self, params, module): else: group_name = 'decay' this_weight_decay = weight_decay - if decay_type == 'layer_wise': layer_id = get_num_layer_layer_wise( name, self.paramwise_cfg.get('num_layers')) @@ -178,7 +178,9 @@ def add_params(self, params, module): @OPTIMIZER_BUILDERS.register_module() class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor): # Different learning rates are set for different layers of backbone. - # Note: Currently, this optimizer constructor is built for BEiT. + # Note: Currently, this optimizer constructor is built for BEiT, + # and it will be deprecated. + # Please use ``LearningRateDecayOptimizerConstructor`` instead. def __init__(self, optimizer_cfg, paramwise_cfg): warnings.warn('DeprecationWarning: Original ' From 14d2026f1b401a2ad172a1bf1094d07b9e9094c2 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 5 Apr 2022 13:22:22 +0800 Subject: [PATCH 12/32] fix --- .../layer_decay_optimizer_constructor.py | 24 ++++++++++++------- .../test_layer_decay_optimizer_constructor.py | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index bf6cf99d0b..aaeebd8550 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -135,16 +135,22 @@ def add_params(self, params, module): else: group_name = 'decay' this_weight_decay = weight_decay - if decay_type == 'layer_wise': - layer_id = get_num_layer_layer_wise( - name, self.paramwise_cfg.get('num_layers')) - logger.info(f'set param {name} as id {layer_id}') - elif decay_type == 'layer_wise_vit': - layer_id = get_num_layer_for_vit(name, num_layers) - logger.info(f'set param {name} as id {layer_id}') + if 'layer_wise' in decay_type: + if 'ConvNeXt' in module.__class__.__name__: + layer_id = get_num_layer_layer_wise( + name, self.paramwise_cfg.get('num_layers')) + logger.info(f'set param {name} as id {layer_id}') + elif 'BEiT' in module.__class__.__name__: + layer_id = get_num_layer_for_vit(name, num_layers) + logger.info(f'set param {name} as id {layer_id}') + else: + raise NotImplementedError() elif decay_type == 'stage_wise': - layer_id = get_num_layer_stage_wise(name, num_layers) - logger.info(f'set param {name} as id {layer_id}') + if 'ConvNeXt' in module.__class__.__name__: + layer_id = get_num_layer_stage_wise(name, num_layers) + logger.info(f'set param {name} as id {layer_id}') + else: + raise NotImplementedError() group_name = f'layer_{layer_id}_{group_name}' if group_name not in parameter_groups: diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 987a5ce903..7b2f5e0125 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -215,7 +215,7 @@ def check_beit_adamw_optimizer(optimizer, gt_lst): def test_beit_layer_decay_optimizer_constructor(): - # paramwise_cfg with ConvNeXtExampleModel + # paramwise_cfg with BEiTExampleModel model = BEiTExampleModel(depth=3) optimizer_cfg = dict( type='AdamW', lr=1, betas=(0.9, 0.999), weight_decay=0.05) From db106568ca0a0803185d27ea90f8e157101b5869 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Wed, 6 Apr 2022 11:37:42 +0800 Subject: [PATCH 13/32] fix --- .../optimizers/layer_decay_optimizer_constructor.py | 8 ++++---- .../test_layer_decay_optimizer_constructor.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index aaeebd8550..4cd1ea28d2 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -8,7 +8,7 @@ from ...utils import get_root_logger -def get_num_layer_layer_wise(var_name, num_max_layer=12): +def get_num_layer_for_convnext(var_name, num_max_layer=12): """Get the layer id to set the different learning rates in ``layer_wise`` decay_type. @@ -51,7 +51,7 @@ def get_num_layer_layer_wise(var_name, num_max_layer=12): return num_max_layer + 1 -def get_num_layer_stage_wise(var_name, num_max_layer): +def get_num_stage_for_convnext(var_name, num_max_layer): """Get the layer id to set the different learning rates in ``stage_wise`` decay_type. @@ -137,7 +137,7 @@ def add_params(self, params, module): this_weight_decay = weight_decay if 'layer_wise' in decay_type: if 'ConvNeXt' in module.__class__.__name__: - layer_id = get_num_layer_layer_wise( + layer_id = get_num_layer_for_convnext( name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') elif 'BEiT' in module.__class__.__name__: @@ -147,7 +147,7 @@ def add_params(self, params, module): raise NotImplementedError() elif decay_type == 'stage_wise': if 'ConvNeXt' in module.__class__.__name__: - layer_id = get_num_layer_stage_wise(name, num_layers) + layer_id = get_num_stage_for_convnext(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: raise NotImplementedError() diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 7b2f5e0125..91df2ab92c 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import pytest import torch import torch.nn as nn from mmcv.cnn import ConvModule @@ -140,7 +141,7 @@ def check_convnext_adamw_optimizer(optimizer, gt_lst): assert param_dict['lr_scale'] == param_dict['lr'] -def test_convnext_learning_rate_decay_optimizer_constructor(): +def test_learning_rate_decay_optimizer_constructor(): # paramwise_cfg with ConvNeXtExampleModel model = ConvNeXtExampleModel() @@ -224,3 +225,11 @@ def test_beit_layer_decay_optimizer_constructor(): paramwise_cfg) optimizer = optim_constructor(model) check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) + + with pytest.raises(NotImplementedError): + paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='stage_wise', num_layers=3) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, paramwise_cfg) + optimizer = optim_constructor(model) + check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) From e8a6a6bedc344fdd617962d2ffa1573895158034 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Wed, 6 Apr 2022 13:47:25 +0800 Subject: [PATCH 14/32] fix --- .../test_layer_decay_optimizer_constructor.py | 93 +++++++++---------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 91df2ab92c..1d8917bb27 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -88,6 +88,29 @@ 'lr_scale': 1 }] +layer_wise_wd_lr = [{ + 'weight_decay': 0.0, + 'lr_scale': 16 +}, { + 'weight_decay': 0.05, + 'lr_scale': 8 +}, { + 'weight_decay': 0.0, + 'lr_scale': 8 +}, { + 'weight_decay': 0.05, + 'lr_scale': 4 +}, { + 'weight_decay': 0.0, + 'lr_scale': 4 +}, { + 'weight_decay': 0.05, + 'lr_scale': 2 +}, { + 'weight_decay': 0.0, + 'lr_scale': 2 +}] + class ConvNeXtExampleModel(nn.Module): @@ -129,6 +152,21 @@ def forward(self, x): return x +class BEiTExampleModel(nn.Module): + + def __init__(self, depth): + super().__init__() + self.backbone = nn.ModuleList() + + # add some variables to meet unit test coverate rate + self.backbone.cls_token = nn.Parameter(torch.ones(1)) + self.backbone.patch_embed = nn.Parameter(torch.ones(1)) + self.backbone.layers = nn.ModuleList() + for _ in range(depth): + layer = nn.Conv2d(3, 3, 1) + self.backbone.layers.append(layer) + + def check_convnext_adamw_optimizer(optimizer, gt_lst): assert isinstance(optimizer, torch.optim.AdamW) assert optimizer.defaults['lr'] == base_lr @@ -154,6 +192,14 @@ def test_learning_rate_decay_optimizer_constructor(): optimizer = optim_constructor(model) check_convnext_adamw_optimizer(optimizer, stage_wise_gt_lst) + with pytest.raises(NotImplementedError): + model = BEiTExampleModel(depth=6) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, stagewise_paramwise_cfg) + optimizer = optim_constructor(model) + check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) + + model = ConvNeXtExampleModel() layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( @@ -162,45 +208,6 @@ def test_learning_rate_decay_optimizer_constructor(): check_convnext_adamw_optimizer(optimizer, layer_wise_gt_lst) -layer_wise_wd_lr = [{ - 'weight_decay': 0.0, - 'lr_scale': 16 -}, { - 'weight_decay': 0.05, - 'lr_scale': 8 -}, { - 'weight_decay': 0.0, - 'lr_scale': 8 -}, { - 'weight_decay': 0.05, - 'lr_scale': 4 -}, { - 'weight_decay': 0.0, - 'lr_scale': 4 -}, { - 'weight_decay': 0.05, - 'lr_scale': 2 -}, { - 'weight_decay': 0.0, - 'lr_scale': 2 -}] - - -class BEiTExampleModel(nn.Module): - - def __init__(self, depth): - super().__init__() - self.backbone = nn.ModuleList() - - # add some variables to meet unit test coverate rate - self.backbone.cls_token = nn.Parameter(torch.ones(1)) - self.backbone.patch_embed = nn.Parameter(torch.ones(1)) - self.backbone.layers = nn.ModuleList() - for _ in range(depth): - layer = nn.Conv2d(3, 3, 1) - self.backbone.layers.append(layer) - - def check_beit_adamw_optimizer(optimizer, gt_lst): assert isinstance(optimizer, torch.optim.AdamW) assert optimizer.defaults['lr'] == 1 @@ -225,11 +232,3 @@ def test_beit_layer_decay_optimizer_constructor(): paramwise_cfg) optimizer = optim_constructor(model) check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) - - with pytest.raises(NotImplementedError): - paramwise_cfg = dict( - decay_rate=decay_rate, decay_type='stage_wise', num_layers=3) - optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, paramwise_cfg) - optimizer = optim_constructor(model) - check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) From f8eb1b7e5812e0761e914f895ae7f56440cb696e Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Wed, 6 Apr 2022 14:00:44 +0800 Subject: [PATCH 15/32] fix --- .../test_layer_decay_optimizer_constructor.py | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 1d8917bb27..50ea26d21c 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import pytest import torch import torch.nn as nn from mmcv.cnn import ConvModule @@ -179,6 +178,19 @@ def check_convnext_adamw_optimizer(optimizer, gt_lst): assert param_dict['lr_scale'] == param_dict['lr'] +def check_beit_adamw_optimizer(optimizer, gt_lst): + assert isinstance(optimizer, torch.optim.AdamW) + assert optimizer.defaults['lr'] == 1 + assert optimizer.defaults['weight_decay'] == 0.05 + param_groups = optimizer.param_groups + # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 7 layers + assert len(param_groups) == 7 + for i, param_dict in enumerate(param_groups): + assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] + assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] + assert param_dict['lr_scale'] == param_dict['lr'] + + def test_learning_rate_decay_optimizer_constructor(): # paramwise_cfg with ConvNeXtExampleModel @@ -192,14 +204,6 @@ def test_learning_rate_decay_optimizer_constructor(): optimizer = optim_constructor(model) check_convnext_adamw_optimizer(optimizer, stage_wise_gt_lst) - with pytest.raises(NotImplementedError): - model = BEiTExampleModel(depth=6) - optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, stagewise_paramwise_cfg) - optimizer = optim_constructor(model) - check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) - - model = ConvNeXtExampleModel() layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( @@ -207,18 +211,13 @@ def test_learning_rate_decay_optimizer_constructor(): optimizer = optim_constructor(model) check_convnext_adamw_optimizer(optimizer, layer_wise_gt_lst) - -def check_beit_adamw_optimizer(optimizer, gt_lst): - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults['lr'] == 1 - assert optimizer.defaults['weight_decay'] == 0.05 - param_groups = optimizer.param_groups - # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 7 layers - assert len(param_groups) == 7 - for i, param_dict in enumerate(param_groups): - assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] - assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] - assert param_dict['lr_scale'] == param_dict['lr'] + layerwise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='layer_wise', num_layers=3) + model = BEiTExampleModel(depth=3) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) def test_beit_layer_decay_optimizer_constructor(): From 1f58c699c58b90abd74ef440354afd97a5e3b9fa Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Thu, 7 Apr 2022 15:27:51 +0800 Subject: [PATCH 16/32] fix --- .../layer_decay_optimizer_constructor.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 4cd1ea28d2..42288b1d70 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -8,7 +8,7 @@ from ...utils import get_root_logger -def get_num_layer_for_convnext(var_name, num_max_layer=12): +def get_layer_id_for_convnext(var_name, max_layer_id=12): """Get the layer id to set the different learning rates in ``layer_wise`` decay_type. @@ -33,7 +33,7 @@ def get_num_layer_for_convnext(var_name, num_max_layer=12): elif stage_id == 2: layer_id = 3 elif stage_id == 3: - layer_id = num_max_layer + layer_id = max_layer_id return layer_id elif var_name.startswith('backbone.stages'): stage_id = int(var_name.split('.')[2]) @@ -45,13 +45,13 @@ def get_num_layer_for_convnext(var_name, num_max_layer=12): elif stage_id == 2: layer_id = 3 + block_id // 3 elif stage_id == 3: - layer_id = num_max_layer + layer_id = max_layer_id return layer_id else: - return num_max_layer + 1 + return max_layer_id + 1 -def get_num_stage_for_convnext(var_name, num_max_layer): +def get_stage_id_for_convnext(var_name, max_stage_id): """Get the layer id to set the different learning rates in ``stage_wise`` decay_type. @@ -72,10 +72,10 @@ def get_num_stage_for_convnext(var_name, num_max_layer): stage_id = int(var_name.split('.')[2]) return stage_id + 1 else: - return num_max_layer - 1 + return max_stage_id - 1 -def get_num_layer_for_vit(var_name, num_max_layer): +def get_layer_id_for_vit(var_name, max_layer_id): """Get the layer id to set the different learning rates. Args: @@ -94,7 +94,7 @@ def get_num_layer_for_vit(var_name, num_max_layer): layer_id = int(var_name.split('.')[2]) return layer_id + 1 else: - return num_max_layer - 1 + return max_layer_id - 1 @OPTIMIZER_BUILDERS.register_module() @@ -137,17 +137,17 @@ def add_params(self, params, module): this_weight_decay = weight_decay if 'layer_wise' in decay_type: if 'ConvNeXt' in module.__class__.__name__: - layer_id = get_num_layer_for_convnext( + layer_id = get_layer_id_for_convnext( name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') elif 'BEiT' in module.__class__.__name__: - layer_id = get_num_layer_for_vit(name, num_layers) + layer_id = get_layer_id_for_vit(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: raise NotImplementedError() elif decay_type == 'stage_wise': if 'ConvNeXt' in module.__class__.__name__: - layer_id = get_num_stage_for_convnext(name, num_layers) + layer_id = get_stage_id_for_convnext(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: raise NotImplementedError() From c931d334f15512667d57bc79d3bf9f176349f30c Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 12 Apr 2022 19:01:57 +0800 Subject: [PATCH 17/32] fix --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 42288b1d70..917bb29bef 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -136,17 +136,17 @@ def add_params(self, params, module): group_name = 'decay' this_weight_decay = weight_decay if 'layer_wise' in decay_type: - if 'ConvNeXt' in module.__class__.__name__: + if 'ConvNeXt' in str(module.backbone): layer_id = get_layer_id_for_convnext( name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') - elif 'BEiT' in module.__class__.__name__: + elif 'BEiT' in str(module.backbone): layer_id = get_layer_id_for_vit(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: raise NotImplementedError() elif decay_type == 'stage_wise': - if 'ConvNeXt' in module.__class__.__name__: + if 'ConvNeXt' in str(module.backbone): layer_id = get_stage_id_for_convnext(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: From e9b19997b7486e6f31dcb45b70fd1f4f047ec7ae Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 25 Apr 2022 12:52:06 +0800 Subject: [PATCH 18/32] fix --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 917bb29bef..ddf08a1348 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -2,10 +2,10 @@ import json import warnings -from mmcv.runner import (OPTIMIZER_BUILDERS, DefaultOptimizerConstructor, - get_dist_info) +from mmcv.runner import DefaultOptimizerConstructor, get_dist_info -from ...utils import get_root_logger +from mmseg.utils import get_root_logger +from ..builder import OPTIMIZER_BUILDERS def get_layer_id_for_convnext(var_name, max_layer_id=12): From 1b71c3ca7a2c419bfb499ce76b6fb7a85868f141 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 25 Apr 2022 12:54:33 +0800 Subject: [PATCH 19/32] fix --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index ddf08a1348..917bb29bef 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -2,10 +2,10 @@ import json import warnings -from mmcv.runner import DefaultOptimizerConstructor, get_dist_info +from mmcv.runner import (OPTIMIZER_BUILDERS, DefaultOptimizerConstructor, + get_dist_info) -from mmseg.utils import get_root_logger -from ..builder import OPTIMIZER_BUILDERS +from ...utils import get_root_logger def get_layer_id_for_convnext(var_name, max_layer_id=12): From 3052fe7985167a46bdcebf71ca2e0ce842f4cc20 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 25 Apr 2022 14:36:48 +0800 Subject: [PATCH 20/32] fix test --- mmseg/core/__init__.py | 3 +-- .../test_layer_decay_optimizer_constructor.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/mmseg/core/__init__.py b/mmseg/core/__init__.py index 535bd4f30b..1a077d2f1f 100644 --- a/mmseg/core/__init__.py +++ b/mmseg/core/__init__.py @@ -7,6 +7,5 @@ from .utils import * # noqa: F401, F403 __all__ = [ - 'LayerDecayOptimizerConstructor', 'OPTIMIZER_BUILDERS', 'build_optimizer', - 'build_optimizer_constructor' + 'OPTIMIZER_BUILDERS', 'build_optimizer', 'build_optimizer_constructor' ] diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 50ea26d21c..3bbcdfdd70 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -108,6 +108,12 @@ }, { 'weight_decay': 0.0, 'lr_scale': 2 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 }] @@ -132,13 +138,14 @@ def __init__(self): self.backbone.downsample_norm2 = nn.BatchNorm2d(2) self.backbone.lin = nn.Parameter(torch.ones(1)) self.backbone.lin.requires_grad = False - self.backbone.downsample_layers = nn.ModuleList() for i in range(4): stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True)) self.backbone.downsample_layers.append(stage) self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) + # for test + self.backbone.ConvNeXt = nn.Conv2d(2, 2, kernel_size=1, groups=2) class PseudoDataParallel(nn.Module): @@ -156,7 +163,6 @@ class BEiTExampleModel(nn.Module): def __init__(self, depth): super().__init__() self.backbone = nn.ModuleList() - # add some variables to meet unit test coverate rate self.backbone.cls_token = nn.Parameter(torch.ones(1)) self.backbone.patch_embed = nn.Parameter(torch.ones(1)) @@ -164,6 +170,7 @@ def __init__(self, depth): for _ in range(depth): layer = nn.Conv2d(3, 3, 1) self.backbone.layers.append(layer) + self.backbone.BEiT = nn.Conv2d(3, 3, 1) def check_convnext_adamw_optimizer(optimizer, gt_lst): @@ -183,8 +190,8 @@ def check_beit_adamw_optimizer(optimizer, gt_lst): assert optimizer.defaults['lr'] == 1 assert optimizer.defaults['weight_decay'] == 0.05 param_groups = optimizer.param_groups - # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 7 layers - assert len(param_groups) == 7 + # 1 layer (cls_token and patch_embed) + 4 layers * 2 (w, b) = 9 layers + assert len(param_groups) == 9 for i, param_dict in enumerate(param_groups): assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] From 5a056fe9336a98bfede68a6c202be92ab84ec38e Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 25 Apr 2022 17:12:50 +0800 Subject: [PATCH 21/32] fix --- .../test_layer_decay_optimizer_constructor.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 3bbcdfdd70..74c4dd2f32 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import pytest import torch import torch.nn as nn from mmcv.cnn import ConvModule @@ -173,6 +174,15 @@ def __init__(self, depth): self.backbone.BEiT = nn.Conv2d(3, 3, 1) +class ViTExampleModel(nn.Module): + + def __init__(self): + super().__init__() + self.backbone = nn.ModuleList() + self.backbone.cls_token = nn.Parameter(torch.ones(1)) + self.backbone.patch_embed = nn.Parameter(torch.ones(1)) + + def check_convnext_adamw_optimizer(optimizer, gt_lst): assert isinstance(optimizer, torch.optim.AdamW) assert optimizer.defaults['lr'] == base_lr @@ -226,6 +236,18 @@ def test_learning_rate_decay_optimizer_constructor(): optimizer = optim_constructor(model) check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) + with pytest.raises(NotImplementedError): + model = ViTExampleModel() + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + + with pytest.raises(NotImplementedError): + model = ViTExampleModel() + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, stagewise_paramwise_cfg) + optimizer = optim_constructor(model) + def test_beit_layer_decay_optimizer_constructor(): From 875d047b9f8aff2e7a638732edb613afdba63c64 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 25 Apr 2022 19:00:54 +0800 Subject: [PATCH 22/32] fix --- .../layer_decay_optimizer_constructor.py | 7 +- .../test_layer_decay_optimizer_constructor.py | 72 ++++++++++--------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index ddf08a1348..990153d8ec 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -124,7 +124,6 @@ def add_params(self, params, module): logger.info('Build LearningRateDecayOptimizerConstructor ' f'{decay_type} {decay_rate} - {num_layers}') weight_decay = self.base_wd - for name, param in module.named_parameters(): if not param.requires_grad: continue # frozen weights @@ -136,17 +135,17 @@ def add_params(self, params, module): group_name = 'decay' this_weight_decay = weight_decay if 'layer_wise' in decay_type: - if 'ConvNeXt' in str(module.backbone): + if 'ConvNeXt' in module.backbone.__class__.__name__: layer_id = get_layer_id_for_convnext( name, self.paramwise_cfg.get('num_layers')) logger.info(f'set param {name} as id {layer_id}') - elif 'BEiT' in str(module.backbone): + elif 'BEiT' in module.backbone.__class__.__name__: layer_id = get_layer_id_for_vit(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: raise NotImplementedError() elif decay_type == 'stage_wise': - if 'ConvNeXt' in str(module.backbone): + if 'ConvNeXt' in module.backbone.__class__.__name__: layer_id = get_stage_id_for_convnext(name, num_layers) logger.info(f'set param {name} as id {layer_id}') else: diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 74c4dd2f32..43bd2b69b9 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -109,12 +109,6 @@ }, { 'weight_decay': 0.0, 'lr_scale': 2 -}, { - 'weight_decay': 0.05, - 'lr_scale': 1 -}, { - 'weight_decay': 0.0, - 'lr_scale': 1 }] @@ -122,38 +116,36 @@ class ConvNeXtExampleModel(nn.Module): def __init__(self): super().__init__() - self.backbone = nn.ModuleList() - self.backbone.stages = nn.ModuleList() + # self.backbone = nn.ModuleList() + self.stages = nn.ModuleList() for i in range(4): stage = nn.Sequential(ConvModule(3, 4, kernel_size=1, bias=True)) - self.backbone.stages.append(stage) - self.backbone.norm0 = nn.BatchNorm2d(2) + self.stages.append(stage) + self.norm0 = nn.BatchNorm2d(2) # add some variables to meet unit test coverate rate - self.backbone.cls_token = nn.Parameter(torch.ones(1)) - self.backbone.mask_token = nn.Parameter(torch.ones(1)) - self.backbone.pos_embed = nn.Parameter(torch.ones(1)) - self.backbone.stem_norm = nn.Parameter(torch.ones(1)) - self.backbone.downsample_norm0 = nn.BatchNorm2d(2) - self.backbone.downsample_norm1 = nn.BatchNorm2d(2) - self.backbone.downsample_norm2 = nn.BatchNorm2d(2) - self.backbone.lin = nn.Parameter(torch.ones(1)) - self.backbone.lin.requires_grad = False - self.backbone.downsample_layers = nn.ModuleList() + self.cls_token = nn.Parameter(torch.ones(1)) + self.mask_token = nn.Parameter(torch.ones(1)) + self.pos_embed = nn.Parameter(torch.ones(1)) + self.stem_norm = nn.Parameter(torch.ones(1)) + self.downsample_norm0 = nn.BatchNorm2d(2) + self.downsample_norm1 = nn.BatchNorm2d(2) + self.downsample_norm2 = nn.BatchNorm2d(2) + self.lin = nn.Parameter(torch.ones(1)) + self.lin.requires_grad = False + self.downsample_layers = nn.ModuleList() for i in range(4): stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True)) - self.backbone.downsample_layers.append(stage) + self.downsample_layers.append(stage) self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) - # for test - self.backbone.ConvNeXt = nn.Conv2d(2, 2, kernel_size=1, groups=2) -class PseudoDataParallel(nn.Module): +class PseudoDataParallel1(nn.Module): def __init__(self): super().__init__() - self.module = ConvNeXtExampleModel() + self.backbone = ConvNeXtExampleModel() def forward(self, x): return x @@ -165,13 +157,23 @@ def __init__(self, depth): super().__init__() self.backbone = nn.ModuleList() # add some variables to meet unit test coverate rate - self.backbone.cls_token = nn.Parameter(torch.ones(1)) - self.backbone.patch_embed = nn.Parameter(torch.ones(1)) - self.backbone.layers = nn.ModuleList() + self.cls_token = nn.Parameter(torch.ones(1)) + self.patch_embed = nn.Parameter(torch.ones(1)) + self.layers = nn.ModuleList() for _ in range(depth): layer = nn.Conv2d(3, 3, 1) - self.backbone.layers.append(layer) - self.backbone.BEiT = nn.Conv2d(3, 3, 1) + self.layers.append(layer) + # self.backbone.BEiT = nn.Conv2d(3, 3, 1) + + +class PseudoDataParallel2(nn.Module): + + def __init__(self, depth): + super().__init__() + self.backbone = BEiTExampleModel(depth) + + def forward(self, x): + return x class ViTExampleModel(nn.Module): @@ -200,8 +202,8 @@ def check_beit_adamw_optimizer(optimizer, gt_lst): assert optimizer.defaults['lr'] == 1 assert optimizer.defaults['weight_decay'] == 0.05 param_groups = optimizer.param_groups - # 1 layer (cls_token and patch_embed) + 4 layers * 2 (w, b) = 9 layers - assert len(param_groups) == 9 + # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 9 layers + assert len(param_groups) == 7 for i, param_dict in enumerate(param_groups): assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] @@ -211,7 +213,7 @@ def check_beit_adamw_optimizer(optimizer, gt_lst): def test_learning_rate_decay_optimizer_constructor(): # paramwise_cfg with ConvNeXtExampleModel - model = ConvNeXtExampleModel() + model = PseudoDataParallel1() optimizer_cfg = dict( type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05) stagewise_paramwise_cfg = dict( @@ -230,7 +232,7 @@ def test_learning_rate_decay_optimizer_constructor(): layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=3) - model = BEiTExampleModel(depth=3) + model = PseudoDataParallel2(depth=3) optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) @@ -252,7 +254,7 @@ def test_learning_rate_decay_optimizer_constructor(): def test_beit_layer_decay_optimizer_constructor(): # paramwise_cfg with BEiTExampleModel - model = BEiTExampleModel(depth=3) + model = PseudoDataParallel2(depth=3) optimizer_cfg = dict( type='AdamW', lr=1, betas=(0.9, 0.999), weight_decay=0.05) paramwise_cfg = dict(layer_decay_rate=2, num_layers=3) From 6eb5bc0128a0beb85d9a1f935db95b34ee8996d7 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Mon, 25 Apr 2022 19:03:27 +0800 Subject: [PATCH 23/32] fix --- tests/test_core/test_layer_decay_optimizer_constructor.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 43bd2b69b9..b229ea8cf5 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -116,7 +116,6 @@ class ConvNeXtExampleModel(nn.Module): def __init__(self): super().__init__() - # self.backbone = nn.ModuleList() self.stages = nn.ModuleList() for i in range(4): stage = nn.Sequential(ConvModule(3, 4, kernel_size=1, bias=True)) @@ -155,7 +154,6 @@ class BEiTExampleModel(nn.Module): def __init__(self, depth): super().__init__() - self.backbone = nn.ModuleList() # add some variables to meet unit test coverate rate self.cls_token = nn.Parameter(torch.ones(1)) self.patch_embed = nn.Parameter(torch.ones(1)) @@ -163,7 +161,6 @@ def __init__(self, depth): for _ in range(depth): layer = nn.Conv2d(3, 3, 1) self.layers.append(layer) - # self.backbone.BEiT = nn.Conv2d(3, 3, 1) class PseudoDataParallel2(nn.Module): From f99eb9af0bf2368bef6030f10f9a2a6db05dde89 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 12:20:14 +0800 Subject: [PATCH 24/32] fix --- tests/test_core/test_layer_decay_optimizer_constructor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index b229ea8cf5..21fd60e309 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -199,7 +199,7 @@ def check_beit_adamw_optimizer(optimizer, gt_lst): assert optimizer.defaults['lr'] == 1 assert optimizer.defaults['weight_decay'] == 0.05 param_groups = optimizer.param_groups - # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 9 layers + # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 7layers assert len(param_groups) == 7 for i, param_dict in enumerate(param_groups): assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] From 97fb9997d336c700c5148cc8facbf709159ad88c Mon Sep 17 00:00:00 2001 From: MeowZheng Date: Tue, 26 Apr 2022 13:51:39 +0800 Subject: [PATCH 25/32] fix ut --- .../test_layer_decay_optimizer_constructor.py | 108 ++++++++---------- 1 file changed, 50 insertions(+), 58 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index b229ea8cf5..958f924e8c 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -12,7 +12,7 @@ base_wd = 0.05 weight_decay = 0.05 -stage_wise_gt_lst = [{ +stage_wise_gt_lr_wd_convnext = [{ 'weight_decay': 0.0, 'lr_scale': 128 }, { @@ -50,7 +50,7 @@ 'lr_scale': 1 }] -layer_wise_gt_lst = [{ +layer_wise_gt_lr_wd_convnext = [{ 'weight_decay': 0.0, 'lr_scale': 128 }, { @@ -88,7 +88,8 @@ 'lr_scale': 1 }] -layer_wise_wd_lr = [{ +# 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 9 layers +layer_wise_gt_wd_lr_beit = [{ 'weight_decay': 0.0, 'lr_scale': 16 }, { @@ -109,10 +110,16 @@ }, { 'weight_decay': 0.0, 'lr_scale': 2 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 }] -class ConvNeXtExampleModel(nn.Module): +class ToyConvNeXt(nn.Module): def __init__(self): super().__init__() @@ -133,116 +140,100 @@ def __init__(self): self.lin = nn.Parameter(torch.ones(1)) self.lin.requires_grad = False self.downsample_layers = nn.ModuleList() - for i in range(4): + for _ in range(4): stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True)) self.downsample_layers.append(stage) - self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) - -class PseudoDataParallel1(nn.Module): +class ToyBEiT(nn.Module): def __init__(self): - super().__init__() - self.backbone = ConvNeXtExampleModel() - - def forward(self, x): - return x - - -class BEiTExampleModel(nn.Module): - - def __init__(self, depth): super().__init__() # add some variables to meet unit test coverate rate self.cls_token = nn.Parameter(torch.ones(1)) self.patch_embed = nn.Parameter(torch.ones(1)) self.layers = nn.ModuleList() - for _ in range(depth): + for _ in range(3): layer = nn.Conv2d(3, 3, 1) self.layers.append(layer) -class PseudoDataParallel2(nn.Module): +class ToySegmentor(nn.Module): - def __init__(self, depth): + def __init__(self, backbone): super().__init__() - self.backbone = BEiTExampleModel(depth) + self.backbone = backbone + self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) + - def forward(self, x): - return x +class PseudoDataParallel(nn.Module): + + def __init__(self, model): + super().__init__() + self.module = model -class ViTExampleModel(nn.Module): +class ToyViT(nn.Module): def __init__(self): super().__init__() - self.backbone = nn.ModuleList() - self.backbone.cls_token = nn.Parameter(torch.ones(1)) - self.backbone.patch_embed = nn.Parameter(torch.ones(1)) -def check_convnext_adamw_optimizer(optimizer, gt_lst): +def check_optimizer_lr_wd(optimizer, gt_lr_wd): assert isinstance(optimizer, torch.optim.AdamW) assert optimizer.defaults['lr'] == base_lr assert optimizer.defaults['weight_decay'] == base_wd param_groups = optimizer.param_groups - assert len(param_groups) == 12 + print(param_groups) + assert len(param_groups) == len(gt_lr_wd) for i, param_dict in enumerate(param_groups): - assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] - assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] + assert param_dict['weight_decay'] == gt_lr_wd[i]['weight_decay'] + assert param_dict['lr_scale'] == gt_lr_wd[i]['lr_scale'] assert param_dict['lr_scale'] == param_dict['lr'] -def check_beit_adamw_optimizer(optimizer, gt_lst): - assert isinstance(optimizer, torch.optim.AdamW) - assert optimizer.defaults['lr'] == 1 - assert optimizer.defaults['weight_decay'] == 0.05 - param_groups = optimizer.param_groups - # 1 layer (cls_token and patch_embed) + 3 layers * 2 (w, b) = 9 layers - assert len(param_groups) == 7 - for i, param_dict in enumerate(param_groups): - assert param_dict['weight_decay'] == gt_lst[i]['weight_decay'] - assert param_dict['lr_scale'] == gt_lst[i]['lr_scale'] - assert param_dict['lr_scale'] == param_dict['lr'] - - -def test_learning_rate_decay_optimizer_constructor(): +@pytest.mark.parametrize('backbone', (ToyBEiT(), )) +def test_learning_rate_decay_optimizer_constructor(backbone): - # paramwise_cfg with ConvNeXtExampleModel - model = PseudoDataParallel1() + # Test lr wd for ConvNeXT + backbone = ToyConvNeXt() + model = PseudoDataParallel(ToySegmentor(backbone)) optimizer_cfg = dict( type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05) + # stagewise decay stagewise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='stage_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, stagewise_paramwise_cfg) optimizer = optim_constructor(model) - check_convnext_adamw_optimizer(optimizer, stage_wise_gt_lst) - + check_optimizer_lr_wd(optimizer, stage_wise_gt_lr_wd_convnext) + # layerwise decay layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - check_convnext_adamw_optimizer(optimizer, layer_wise_gt_lst) + check_optimizer_lr_wd(optimizer, layer_wise_gt_lr_wd_convnext) + + # Test lr wd for BEiT + backbone = ToyBEiT() + model = PseudoDataParallel(ToySegmentor(backbone)) layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=3) - model = PseudoDataParallel2(depth=3) optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) + check_optimizer_lr_wd(optimizer, layer_wise_gt_wd_lr_beit) + # Test invalidation of lr wd for Vit + backbone = ToyViT() + model = PseudoDataParallel(ToySegmentor(backbone)) with pytest.raises(NotImplementedError): - model = ViTExampleModel() optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - with pytest.raises(NotImplementedError): - model = ViTExampleModel() optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, stagewise_paramwise_cfg) optimizer = optim_constructor(model) @@ -251,11 +242,12 @@ def test_learning_rate_decay_optimizer_constructor(): def test_beit_layer_decay_optimizer_constructor(): # paramwise_cfg with BEiTExampleModel - model = PseudoDataParallel2(depth=3) + backbone = ToyBEiT() + model = PseudoDataParallel(ToySegmentor(backbone)) optimizer_cfg = dict( type='AdamW', lr=1, betas=(0.9, 0.999), weight_decay=0.05) paramwise_cfg = dict(layer_decay_rate=2, num_layers=3) optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) - check_beit_adamw_optimizer(optimizer, layer_wise_wd_lr) + check_optimizer_lr_wd(optimizer, layer_wise_gt_wd_lr_beit) From 2acb909866426616d3a99110cc514af671641ff4 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 15:21:15 +0800 Subject: [PATCH 26/32] fix --- .../optimizers/layer_decay_optimizer_constructor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 990153d8ec..ca8cafe92e 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -8,13 +8,13 @@ from ..builder import OPTIMIZER_BUILDERS -def get_layer_id_for_convnext(var_name, max_layer_id=12): +def get_layer_id_for_convnext(var_name, max_layer_id): """Get the layer id to set the different learning rates in ``layer_wise`` decay_type. Args: var_name (str): The key of the model. - num_max_layer (int): Maximum number of backbone layers. + max_layer_id (int): Maximum number of backbone layers. Returns: int: The id number corresponding to different learning rate in @@ -52,12 +52,12 @@ def get_layer_id_for_convnext(var_name, max_layer_id=12): def get_stage_id_for_convnext(var_name, max_stage_id): - """Get the layer id to set the different learning rates in ``stage_wise`` + """Get the stage id to set the different learning rates in ``stage_wise`` decay_type. Args: var_name (str): The key of the model. - num_max_layer (int): Maximum number of backbone layers. + max_stage_id (int): Maximum number of backbone layers. Returns: int: The id number corresponding to different learning rate in ``LearningRateDecayOptimizerConstructor``. @@ -103,7 +103,7 @@ class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): # Note: Currently, this optimizer constructor is built for ConvNeXt # and BEiT. - def add_params(self, params, module): + def add_params(self, params, module, **kwargs): """Add all parameters of module to the params list. The parameters of the given module will be added to the list of param From 494c8ac1d651f4b3478951b2b1956d81c7437cf0 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 15:32:55 +0800 Subject: [PATCH 27/32] fix --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index ca8cafe92e..bbd2e2fa76 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -82,7 +82,7 @@ def get_layer_id_for_vit(var_name, max_layer_id): var_name (str): The key of the model. num_max_layer (int): Maximum number of backbone layers. Returns: - layer id (int): Returns the layer id of the key. + int: Returns the layer id of the key. """ if var_name in ('backbone.cls_token', 'backbone.mask_token', From 50d56c667b89b4f85d4edad721eb24312de22b61 Mon Sep 17 00:00:00 2001 From: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> Date: Tue, 26 Apr 2022 15:42:16 +0800 Subject: [PATCH 28/32] Update tests/test_core/test_layer_decay_optimizer_constructor.py --- tests/test_core/test_layer_decay_optimizer_constructor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 8b61cdaede..6d867e9e7c 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -191,8 +191,7 @@ def check_optimizer_lr_wd(optimizer, gt_lr_wd): assert param_dict['lr_scale'] == param_dict['lr'] -@pytest.mark.parametrize('backbone', (ToyBEiT(), )) -def test_learning_rate_decay_optimizer_constructor(backbone): +def test_learning_rate_decay_optimizer_constructor(): # Test lr wd for ConvNeXT backbone = ToyConvNeXt() From 4d13e41c8385712263feb1a4577703cc257a4f03 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 15:49:54 +0800 Subject: [PATCH 29/32] fix --- .../test_layer_decay_optimizer_constructor.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 6d867e9e7c..530e31612b 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -12,7 +12,7 @@ base_wd = 0.05 weight_decay = 0.05 -stage_wise_gt_lr_wd_convnext = [{ +expected_stage_wise_gt_lr_wd_convnext = [{ 'weight_decay': 0.0, 'lr_scale': 128 }, { @@ -50,7 +50,7 @@ 'lr_scale': 1 }] -layer_wise_gt_lr_wd_convnext = [{ +expected_layer_wise_gt_lr_wd_convnext = [{ 'weight_decay': 0.0, 'lr_scale': 128 }, { @@ -88,7 +88,7 @@ 'lr_scale': 1 }] -layer_wise_gt_wd_lr_beit = [{ +expected_layer_wise_gt_wd_lr_beit = [{ 'weight_decay': 0.0, 'lr_scale': 16 }, { @@ -204,14 +204,14 @@ def test_learning_rate_decay_optimizer_constructor(): optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, stagewise_paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, stage_wise_gt_lr_wd_convnext) + check_optimizer_lr_wd(optimizer, expected_stage_wise_gt_lr_wd_convnext) # layerwise decay layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, layer_wise_gt_lr_wd_convnext) + check_optimizer_lr_wd(optimizer, expected_layer_wise_gt_lr_wd_convnext) # Test lr wd for BEiT backbone = ToyBEiT() @@ -222,7 +222,7 @@ def test_learning_rate_decay_optimizer_constructor(): optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, layer_wise_gt_wd_lr_beit) + check_optimizer_lr_wd(optimizer, expected_layer_wise_gt_wd_lr_beit) # Test invalidation of lr wd for Vit backbone = ToyViT() @@ -248,4 +248,4 @@ def test_beit_layer_decay_optimizer_constructor(): optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, layer_wise_gt_wd_lr_beit) + check_optimizer_lr_wd(optimizer, expected_layer_wise_gt_wd_lr_beit) From 6285ae8297e7d52bb97087c413e5ae2c447595ca Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 16:02:50 +0800 Subject: [PATCH 30/32] fix --- .../layer_decay_optimizer_constructor.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index bbd2e2fa76..3227df894b 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -99,9 +99,11 @@ def get_layer_id_for_vit(var_name, max_layer_id): @OPTIMIZER_BUILDERS.register_module() class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): - # Different learning rates are set for different layers of backbone. - # Note: Currently, this optimizer constructor is built for ConvNeXt - # and BEiT. + """Different learning rates are set for different layers of backbone. + + Note: Currently, this optimizer constructor is built for ConvNeXt + and BEiT. + """ def add_params(self, params, module, **kwargs): """Add all parameters of module to the params list. @@ -182,10 +184,12 @@ def add_params(self, params, module, **kwargs): @OPTIMIZER_BUILDERS.register_module() class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor): - # Different learning rates are set for different layers of backbone. - # Note: Currently, this optimizer constructor is built for BEiT, - # and it will be deprecated. - # Please use ``LearningRateDecayOptimizerConstructor`` instead. + """Different learning rates are set for different layers of backbone. + + Note: Currently, this optimizer constructor is built for BEiT, + and it will be deprecated. + Please use ``LearningRateDecayOptimizerConstructor`` instead. + """ def __init__(self, optimizer_cfg, paramwise_cfg): warnings.warn('DeprecationWarning: Original ' From 8e11179bd1c755740a4fd1ae86be6857a748019d Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 18:37:46 +0800 Subject: [PATCH 31/32] fix --- mmseg/core/optimizers/layer_decay_optimizer_constructor.py | 2 ++ tests/test_core/test_layer_decay_optimizer_constructor.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index 3227df894b..ce376760bd 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -58,6 +58,7 @@ def get_stage_id_for_convnext(var_name, max_stage_id): Args: var_name (str): The key of the model. max_stage_id (int): Maximum number of backbone layers. + Returns: int: The id number corresponding to different learning rate in ``LearningRateDecayOptimizerConstructor``. @@ -81,6 +82,7 @@ def get_layer_id_for_vit(var_name, max_layer_id): Args: var_name (str): The key of the model. num_max_layer (int): Maximum number of backbone layers. + Returns: int: Returns the layer id of the key. """ diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 530e31612b..45591d22b4 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -12,7 +12,7 @@ base_wd = 0.05 weight_decay = 0.05 -expected_stage_wise_gt_lr_wd_convnext = [{ +expected_stage_wise_lr_wd_convnext = [{ 'weight_decay': 0.0, 'lr_scale': 128 }, { @@ -204,7 +204,7 @@ def test_learning_rate_decay_optimizer_constructor(): optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, stagewise_paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, expected_stage_wise_gt_lr_wd_convnext) + check_optimizer_lr_wd(optimizer, expected_stage_wise_lr_wd_convnext) # layerwise decay layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) From 23b920ddf3e5cd07ac9c3969a9bb5756d6b5e2f9 Mon Sep 17 00:00:00 2001 From: linfangjian01 Date: Tue, 26 Apr 2022 20:46:37 +0800 Subject: [PATCH 32/32] fix --- .../test_layer_decay_optimizer_constructor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 45591d22b4..268a9a1489 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -50,7 +50,7 @@ 'lr_scale': 1 }] -expected_layer_wise_gt_lr_wd_convnext = [{ +expected_layer_wise_lr_wd_convnext = [{ 'weight_decay': 0.0, 'lr_scale': 128 }, { @@ -88,7 +88,7 @@ 'lr_scale': 1 }] -expected_layer_wise_gt_wd_lr_beit = [{ +expected_layer_wise_wd_lr_beit = [{ 'weight_decay': 0.0, 'lr_scale': 16 }, { @@ -211,7 +211,7 @@ def test_learning_rate_decay_optimizer_constructor(): optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, expected_layer_wise_gt_lr_wd_convnext) + check_optimizer_lr_wd(optimizer, expected_layer_wise_lr_wd_convnext) # Test lr wd for BEiT backbone = ToyBEiT() @@ -222,7 +222,7 @@ def test_learning_rate_decay_optimizer_constructor(): optim_constructor = LearningRateDecayOptimizerConstructor( optimizer_cfg, layerwise_paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, expected_layer_wise_gt_wd_lr_beit) + check_optimizer_lr_wd(optimizer, expected_layer_wise_wd_lr_beit) # Test invalidation of lr wd for Vit backbone = ToyViT() @@ -248,4 +248,4 @@ def test_beit_layer_decay_optimizer_constructor(): optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, paramwise_cfg) optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, expected_layer_wise_gt_wd_lr_beit) + check_optimizer_lr_wd(optimizer, expected_layer_wise_wd_lr_beit)