Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow setting per-layer learning rates" #1612

Merged
merged 20 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
678276b
updated schedulers, warmup and logging
shaydeci Nov 5, 2023
012fb72
updated recipes
shaydeci Nov 5, 2023
c56f85a
updated tests
shaydeci Nov 5, 2023
b16a76a
updated some typing and docs
shaydeci Nov 6, 2023
df5d481
updated some typing and docs
shaydeci Nov 6, 2023
8ba7c44
Merge branch 'master' into feature/SG-1209_introduce_optimizer_initia…
shaydeci Nov 6, 2023
595706a
removed update_param_groups test
shaydeci Nov 6, 2023
aaf56c1
Merge remote-tracking branch 'origin/feature/SG-1209_introduce_optimi…
shaydeci Nov 6, 2023
ed47193
handled training with instantiated optimizer
shaydeci Nov 7, 2023
30170ae
updated instantiated optimizer tests
shaydeci Nov 7, 2023
f5e3c06
Merge branch 'master' into feature/SG-1209_introduce_optimizer_initia…
shaydeci Nov 7, 2023
0c08687
fixed erased initial_lr in unit test
shaydeci Nov 7, 2023
c4f9f54
Merge remote-tracking branch 'origin/feature/SG-1209_introduce_optimi…
shaydeci Nov 7, 2023
46a23a8
Merge branch 'master' into feature/SG-1209_introduce_optimizer_initia…
shaydeci Nov 7, 2023
c1d3c79
updated message and tests
shaydeci Nov 9, 2023
966180d
Merge remote-tracking branch 'origin/feature/SG-1209_introduce_optimi…
shaydeci Nov 9, 2023
8b61ce3
Merge branch 'master' into feature/SG-1209_introduce_optimizer_initia…
BloodAxe Nov 9, 2023
bb6bc7a
added train test
shaydeci Nov 12, 2023
c122c0e
Merge remote-tracking branch 'origin/feature/SG-1209_introduce_optimi…
shaydeci Nov 12, 2023
8aed103
Merge branch 'master' into feature/SG-1209_introduce_optimizer_initia…
BloodAxe Nov 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/super_gradients/recipes/cityscapes_ddrnet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,17 @@ architecture: ddrnet_23

training_hyperparams:
max_epochs: 500
initial_lr: 0.0075 # batch size 24
initial_lr: # batch size 24
default: 0.075
shaydeci marked this conversation as resolved.
Show resolved Hide resolved
shaydeci marked this conversation as resolved.
Show resolved Hide resolved
# backbone layers
_backbone: 0.0075
compression3: 0.0075
compression4: 0.0075
down3: 0.0075
down4: 0.0075
layer3_skip: 0.0075
layer4_skip: 0.0075
layer5_skip: 0.0075
loss:
DiceCEEdgeLoss:
num_classes: 19
Expand Down
14 changes: 13 additions & 1 deletion src/super_gradients/recipes/cityscapes_kd_base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,19 @@ resume: False
training_hyperparams:
sync_bn: True
max_epochs: 500
initial_lr: 0.0075 # batch size 24

initial_lr: # batch size 24
default: 0.075
# backbone layers
_backbone: 0.0075
compression3: 0.0075
compression4: 0.0075
down3: 0.0075
down4: 0.0075
layer3_skip: 0.0075
layer4_skip: 0.0075
layer5_skip: 0.0075

resume: ${resume}
loss:
_target_: super_gradients.training.losses.seg_kd_loss.SegKDLoss
Expand Down
3 changes: 3 additions & 0 deletions src/super_gradients/recipes/cityscapes_pplite_seg50.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ checkpoint_params:

training_hyperparams:
sync_bn: True
initial_lr:
"encoder.backbone": 0.01
default: 0.1
loss:
DiceCEEdgeLoss:
num_classes: 19
Expand Down
4 changes: 4 additions & 0 deletions src/super_gradients/recipes/cityscapes_pplite_seg75.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ checkpoint_params:

training_hyperparams:
sync_bn: True
initial_lr:
"encoder.backbone": 0.01
default: 0.1

loss:
DiceCEEdgeLoss:
num_classes: 19
Expand Down
3 changes: 3 additions & 0 deletions src/super_gradients/recipes/cityscapes_stdc_seg50.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ checkpoint_params:
strict_load: no_key_matching

training_hyperparams:
initial_lr:
cp: 0.01
default: 0.1
sync_bn: True
loss:
DiceCEEdgeLoss:
Expand Down
5 changes: 4 additions & 1 deletion src/super_gradients/recipes/cityscapes_stdc_seg75.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ checkpoint_params:
strict_load: no_key_matching

training_hyperparams:
initial_lr: 0.005
initial_lr:
cp: 0.005
default: 0.05

sync_bn: True

loss:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ warmup_initial_lr: 1e-6
lr_warmup_steps: 1000
lr_warmup_epochs: 0

initial_lr: 2e-4
initial_lr: 2e-4


lr_mode: CosineLRScheduler
cosine_final_lr_ratio: 0.1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import torch
from torch import Tensor
from torch.utils.data import default_collate
from torch.utils.data.dataloader import default_collate

from super_gradients.common.registry.registry import register_collate_function
from super_gradients.training.samples import PoseEstimationSample
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def _separate_lr_multiply_params(self):
backbone_names = [n for n, p in self.backbone.named_parameters()]
multiply_lr_params, no_multiply_params = {}, {}
for name, param in self.named_parameters():
if name in backbone_names:
if any([backbone_name in name for backbone_name in backbone_names]):
no_multiply_params[name] = param
else:
multiply_lr_params[name] = param
Expand Down
8 changes: 7 additions & 1 deletion src/super_gradients/training/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"warmup_mode": "LinearEpochLRWarmup",
"step_lr_update_freq": None,
"lr_updates": [],
"initial_lr": None,
"clip_grad_norm": None,
"pre_prediction_callback": None,
"ckpt_best_name": "ckpt_best.pth",
Expand Down Expand Up @@ -98,7 +99,12 @@
# "lr_updates": {"type": "array", "minItems": 1},
"lr_decay_factor": {"type": "number", "minimum": 0, "maximum": 1},
"lr_warmup_epochs": {"type": "number", "minimum": 0, "maximum": 10},
"initial_lr": {"type": "number", "exclusiveMinimum": 0, "maximum": 10},
"initial_lr": {
"anyOf": [
{"type": ["number", "string", "boolean", "null"]},
{"type": "object", "patternProperties": {"^[a-zA-Z0-9_.]+$": {"type": "number"}}, "additionalProperties": False},
]
},
},
"if": {"properties": {"lr_mode": {"const": "StepLRScheduler"}}},
"then": {"required": ["lr_updates", "lr_decay_factor"]},
Expand Down
44 changes: 31 additions & 13 deletions src/super_gradients/training/sg_trainer/sg_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
broadcast_from_master,
)
from super_gradients.training.utils.ema import ModelEMA
from super_gradients.training.utils.optimizer_utils import build_optimizer
from super_gradients.training.utils.optimizer_utils import build_optimizer, get_initial_lr_from_optimizer
from super_gradients.training.utils.sg_trainer_utils import MonitoredValue, log_main_training_params
from super_gradients.training.utils.utils import fuzzy_idx_in_list, unwrap_model
from super_gradients.training.utils.weight_averaging_utils import ModelWeightAveraging
Expand Down Expand Up @@ -906,9 +906,16 @@ def train(
Final learning rate ratio (only relevant when `lr_mode`='CosineLRScheduler'). The cosine starts from initial_lr and reaches
initial_lr * cosine_final_lr_ratio in last epoch

- `inital_lr` : float
- `inital_lr` : Union[float, Dict[str, float]

Initial learning rate.
Initial learning rate as:
float - learning rate value when passed as a scalar
Dictionary where keys are group names and values are the learning rates.
For example {"default": 0.01, "head": 0.1}

- Keys in such mapping are prefixes of named parameters of the model.
- The "default" key is mandatory, and it's lr value is set for any group not specified in the other keys
- It is also possible to freeze some parts of the model by assigning 0 as a lr value.

- `loss` : Union[nn.module, str]

Expand Down Expand Up @@ -1310,6 +1317,20 @@ def forward(self, inputs, targets):
else:
raise RuntimeError("warmup_mode has to be either a name of a mode (str) or a subclass of PhaseCallback")

if isinstance(self.training_params.optimizer, str) or (
inspect.isclass(self.training_params.optimizer) and issubclass(self.training_params.optimizer, torch.optim.Optimizer)
):
self.optimizer = build_optimizer(net=unwrap_model(self.net), lr=self.training_params.initial_lr, training_params=self.training_params)
elif isinstance(self.training_params.optimizer, torch.optim.Optimizer):
if self.training_params.initial_lr is not None:
raise RuntimeError("An instantiated optimizer cannot be passed along initial_lr != None")
self.optimizer = self.training_params.optimizer

# NEED TO EXTRACT INITAL_LR FROM THE OPTIMIZER PARAM GROUPS
self.training_params.initial_lr = get_initial_lr_from_optimizer(self.optimizer)
else:
raise UnsupportedOptimizerFormat()
shaydeci marked this conversation as resolved.
Show resolved Hide resolved

if warmup_callback_cls is not None:
self.phase_callbacks.append(
warmup_callback_cls(
Expand Down Expand Up @@ -1343,15 +1364,6 @@ def forward(self, inputs, targets):
self._reset_best_metric()
load_opt_params = False

if isinstance(self.training_params.optimizer, str) or (
inspect.isclass(self.training_params.optimizer) and issubclass(self.training_params.optimizer, torch.optim.Optimizer)
):
self.optimizer = build_optimizer(net=unwrap_model(self.net), lr=self.training_params.initial_lr, training_params=self.training_params)
elif isinstance(self.training_params.optimizer, torch.optim.Optimizer):
self.optimizer = self.training_params.optimizer
else:
raise UnsupportedOptimizerFormat()

if self.lr_mode is not None:
lr_scheduler_callback = create_lr_scheduler_callback(
lr_mode=self.lr_mode,
Expand Down Expand Up @@ -1448,6 +1460,8 @@ def forward(self, inputs, targets):
train_dataset_length=len(self.train_loader.dataset),
train_dataloader_len=len(self.train_loader),
max_train_batches=self.max_train_batches,
model=unwrap_model(self.net),
param_groups=self.optimizer.param_groups,
)

self._maybe_set_preprocessing_params_for_model_from_dataset()
Expand Down Expand Up @@ -1992,7 +2006,11 @@ def _get_epoch_start_logging_values(self) -> dict:
"""Get all the values that should be logged at the start of each epoch.
This is useful for values like Learning Rate that can change over an epoch."""
lrs = [self.optimizer.param_groups[i]["lr"] for i in range(len(self.optimizer.param_groups))]
lr_titles = ["LR/Param_group_" + str(i) for i in range(len(self.optimizer.param_groups))] if len(self.optimizer.param_groups) > 1 else ["LR"]
lr_titles = (
["LR/" + self.optimizer.param_groups[i].get("name", str(i)) for i in range(len(self.optimizer.param_groups))]
if len(self.optimizer.param_groups) > 1
else ["LR"]
)
lr_dict = {lr_titles[i]: lrs[i] for i in range(len(lrs))}
return lr_dict

Expand Down
Loading