From c26527171a30a31b9b3966f9d2f3cb4d28fe3c55 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 5 Oct 2023 17:37:47 +0300 Subject: [PATCH 1/9] added deprecation decorator and removed some refs --- documentation/source/Checkpoints.md | 2 +- .../source/Example_Classification.md | 1 - documentation/source/LRScheduling.md | 6 +-- documentation/source/Losses.md | 18 ++++----- documentation/source/PhaseCallbacks.md | 2 +- documentation/source/configuration_files.md | 1 - src/super_gradients/common/deprecate.py | 40 ++++++++++++++++++- .../cifar10_training_torch_objects_example.py | 1 - .../deci_lab_export_example.py | 1 - .../examples/early_stop/early_stop_example.py | 1 - .../recipes/cityscapes_regseg48.yaml | 6 +-- .../coco2017_ssd_lite_mobilenet_v2.yaml | 8 ++-- .../recipes/roboflow_ppyoloe.yaml | 2 - .../coco2017_dekr_pose_train_params.yaml | 11 +++-- .../coco2017_rescoring_train_params.yaml | 2 - ...17_ssd_lite_mobilenet_v2_train_params.yaml | 9 ++--- .../coco2017_yolox_train_params.yaml | 9 ++--- .../default_train_params.yaml | 2 +- .../imagenet_efficientnet_train_params.yaml | 6 +-- .../imagenet_mobilenetv3_train_params.yaml | 6 +-- .../imagenet_regnetY_train_params.yaml | 6 +-- src/super_gradients/training/params.py | 5 ++- .../training/sg_trainer/sg_trainer.py | 6 +-- .../conversion_callback_test.py | 1 - .../integration_tests/deci_lab_export_test.py | 1 - .../ema_train_integration_test.py | 1 - .../pretrained_models_test.py | 6 +-- .../coded_qat_launch_test.py | 2 - tests/unit_tests/dataset_statistics_test.py | 3 +- tests/unit_tests/detection_dataset_test.py | 3 +- tests/unit_tests/double_training_test.py | 1 - tests/unit_tests/early_stop_test.py | 1 - tests/unit_tests/factories_test.py | 2 - tests/unit_tests/forward_pass_prep_fn_test.py | 1 - tests/unit_tests/kd_ema_test.py | 1 - tests/unit_tests/kd_trainer_test.py | 1 - tests/unit_tests/load_ema_ckpt_test.py | 1 - .../local_ckpt_head_replacement_test.py | 1 - tests/unit_tests/loss_loggings_test.py | 3 -- tests/unit_tests/lr_cooldown_test.py | 1 - tests/unit_tests/lr_warmup_test.py | 5 --- .../unit_tests/max_batches_loop_break_test.py | 2 - .../optimizer_params_override_test.py | 2 - tests/unit_tests/phase_context_test.py | 1 - tests/unit_tests/preprocessing_unit_test.py | 6 +-- tests/unit_tests/resume_training_test.py | 4 -- tests/unit_tests/save_ckpt_test.py | 1 - .../test_train_with_torch_scheduler.py | 1 - tests/unit_tests/train_after_test_test.py | 1 - tests/unit_tests/train_logging_test.py | 1 - .../train_with_intialized_param_args_test.py | 7 ---- .../unit_tests/train_with_precise_bn_test.py | 2 - .../update_param_groups_unit_test.py | 1 - tests/unit_tests/vit_unit_test.py | 1 - 54 files changed, 93 insertions(+), 123 deletions(-) diff --git a/documentation/source/Checkpoints.md b/documentation/source/Checkpoints.md index 48847ca871..4d1eb149e8 100644 --- a/documentation/source/Checkpoints.md +++ b/documentation/source/Checkpoints.md @@ -80,7 +80,7 @@ model = models.get(model_name=Models.RESNET18, num_classes=10) train_params = { ... "loss": "LabelSmoothingCrossEntropyLoss", - "criterion_params": {}, + "save_ckpt_epoch_list": [10,15] ... } diff --git a/documentation/source/Example_Classification.md b/documentation/source/Example_Classification.md index f6d9d6606b..3224bdc172 100644 --- a/documentation/source/Example_Classification.md +++ b/documentation/source/Example_Classification.md @@ -308,7 +308,6 @@ Output (Training parameters): 'ckpt_name': 'ckpt_latest.pth', 'clip_grad_norm': None, 'cosine_final_lr_ratio': 0.01, - 'criterion_params': {}, 'dataset_statistics': False, 'ema': False, 'ema_params': {'decay': 0.9999, 'decay_type': 'exp', 'beta': 15}, diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md index 04cfa238bf..4816074fed 100644 --- a/documentation/source/LRScheduling.md +++ b/documentation/source/LRScheduling.md @@ -299,7 +299,7 @@ train_params = { "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, + "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -327,7 +327,6 @@ training_hyperparams: initial_lr: 0.1 loss: CrossEntropyLoss optimizer: SGD - criterion_params: {} optimizer_params: weight_decay: 1e-4 momentum: 0.9 @@ -366,7 +365,7 @@ train_params = { "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, + "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -398,7 +397,6 @@ training_hyperparams: initial_lr: 0.1 loss: CrossEntropyLoss optimizer: SGD - criterion_params: {} optimizer_params: weight_decay: 1e-4 momentum: 0.9 diff --git a/documentation/source/Losses.md b/documentation/source/Losses.md index e7fd837566..88c1094c5e 100644 --- a/documentation/source/Losses.md +++ b/documentation/source/Losses.md @@ -32,7 +32,6 @@ model = ... train_params = { ... "loss": "LabelSmoothingCrossEntropyLoss", - "criterion_params": {} ... } trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=valid_dataloader) @@ -54,15 +53,12 @@ When doing so, in your `my_training_hyperparams.yaml` file: ```yaml ... -loss: YoloXDetectionLoss - -criterion_params: - strides: [8, 16, 32] # output strides of all yolo outputs - num_classes: 80 +loss: + YoloXDetectionLoss: + strides: [8, 16, 32] # output strides of all yolo outputs + num_classes: 80 ``` -Note that two `training_params` parameters define the loss function: `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `YoloXDetectionLoss` class constructor. - ## Passing Instantiated nn.Module Objects as Loss Functions SuperGradients also supports passing instantiated nn.Module Objects as demonstrated below: @@ -201,9 +197,11 @@ Then, in your `my_training_hyperparams.yaml`, use `"my_loss"` in the same way as ```yaml ... -loss: my_loss +loss: + my_loss: + my_loss_arg1: ... + my_loss_arg2: ... -criterion_params: ... ``` diff --git a/documentation/source/PhaseCallbacks.md b/documentation/source/PhaseCallbacks.md index cc7480aa06..790de0860d 100644 --- a/documentation/source/PhaseCallbacks.md +++ b/documentation/source/PhaseCallbacks.md @@ -238,7 +238,7 @@ model = ... train_params = { "loss": "LabelSmoothingCrossEntropyLoss", - "criterion_params": {}, + "phase_callbacks": [SaveFirstBatchCallback()], ... } diff --git a/documentation/source/configuration_files.md b/documentation/source/configuration_files.md index bb253bd8c3..5203b04b29 100644 --- a/documentation/source/configuration_files.md +++ b/documentation/source/configuration_files.md @@ -30,7 +30,6 @@ lr_warmup_epochs: 0 initial_lr: 0.1 loss: LabelSmoothingCrossEntropyLoss optimizer: SGD -criterion_params: {} optimizer_params: weight_decay: 1e-4 diff --git a/src/super_gradients/common/deprecate.py b/src/super_gradients/common/deprecate.py index 516ec85ff4..3fbb3f2285 100644 --- a/src/super_gradients/common/deprecate.py +++ b/src/super_gradients/common/deprecate.py @@ -1,6 +1,6 @@ import warnings from functools import wraps -from typing import Optional +from typing import Optional, Callable from pkg_resources import parse_version @@ -76,3 +76,41 @@ def wrapper(*args, **kwargs): return wrapper return decorator + + +def deprecated_training_param(deprecated_tparam_name: str, deprecated_since: str, removed_from: str, new_arg_assigner: Callable, message: str = ""): + def decorator(func): + def wrapper(*args, **training_params): + if deprecated_tparam_name in training_params: + import super_gradients + + is_still_supported = parse_version(super_gradients.__version__) < parse_version(removed_from) + if is_still_supported: + message_prefix = ( + f"Training hyperparameter `{deprecated_tparam_name} is deprecated since version `{deprecated_since}` " + f"and will be removed in version `{removed_from}`.\n" + ) + warnings.warn(message_prefix + message, DeprecationWarning) + training_params = new_arg_assigner(**training_params) + else: + message_prefix = ( + f"Training hyperparameter `{deprecated_tparam_name} was deprecate since version `{deprecated_since}` " + f"and was removed in version `{removed_from}`.\n" + ) + raise RuntimeError(message_prefix + message) + + return func(*args, **training_params) + + return wrapper + + return decorator + + +def get_deprecated_nested_params_to_factory_format_assigner(param_name: str, nested_params_name: str) -> Callable: + def deprecated_nested_params_to_factory_format_assigner(**params): + nested_params = params.get(nested_params_name) + param_val = params.get(param_name) + params[param_name] = {param_val: nested_params} + return params + + return deprecated_nested_params_to_factory_format_assigner diff --git a/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py b/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py index e293106f1f..e845d74b80 100644 --- a/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py +++ b/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py @@ -55,7 +55,6 @@ "phase_callbacks": phase_callbacks, "initial_lr": lr, "loss": loss_fn, - "criterion_params": {}, "optimizer": optimizer, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py index 9f49c0130f..4e3b7f942d 100644 --- a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py +++ b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py @@ -61,7 +61,6 @@ def main(architecture_name: str): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/src/super_gradients/examples/early_stop/early_stop_example.py b/src/super_gradients/examples/early_stop/early_stop_example.py index 5cf1124b06..4575426d13 100644 --- a/src/super_gradients/examples/early_stop/early_stop_example.py +++ b/src/super_gradients/examples/early_stop/early_stop_example.py @@ -23,7 +23,6 @@ "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/recipes/cityscapes_regseg48.yaml b/src/super_gradients/recipes/cityscapes_regseg48.yaml index 27c4cbd5ac..8b584c01b2 100644 --- a/src/super_gradients/recipes/cityscapes_regseg48.yaml +++ b/src/super_gradients/recipes/cityscapes_regseg48.yaml @@ -62,9 +62,9 @@ training_hyperparams: ema: True - loss: LabelSmoothingCrossEntropyLoss - criterion_params: - ignore_index: ${cityscapes_ignored_label} + loss: + LabelSmoothingCrossEntropyLoss: + ignore_index: ${cityscapes_ignored_label} train_metrics_list: - PixelAccuracy: diff --git a/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml b/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml index d5bbc1ab88..0e7d6fcbcc 100644 --- a/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml +++ b/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml @@ -50,9 +50,11 @@ arch_params: resume: False training_hyperparams: resume: ${resume} - criterion_params: - alpha: 1.0 - dboxes: ${dboxes} + loss: + SSDLoss: + alpha: 1.0 + dboxes: ${dboxes} # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN. + multi_gpu: DDP num_gpus: 4 diff --git a/src/super_gradients/recipes/roboflow_ppyoloe.yaml b/src/super_gradients/recipes/roboflow_ppyoloe.yaml index c904cf96c5..2024ed8586 100644 --- a/src/super_gradients/recipes/roboflow_ppyoloe.yaml +++ b/src/super_gradients/recipes/roboflow_ppyoloe.yaml @@ -40,8 +40,6 @@ training_hyperparams: resume: ${resume} max_epochs: 100 mixed_precision: True - criterion_params: - num_classes: ${num_classes} phase_callbacks: - RoboflowResultCallback: dataset_name: ${dataset_name} diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml index 7ed162ad83..421e3de7ad 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml @@ -12,12 +12,11 @@ lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.1 batch_accumulate: 1 initial_lr: 1e-3 -loss: DEKRLoss - -criterion_params: - heatmap_loss: qfl - heatmap_loss_factor: 1.0 - offset_loss_factor: 0.1 +loss: + DEKRLoss: + heatmap_loss: qfl + heatmap_loss_factor: 1.0 + offset_loss_factor: 0.1 mixed_precision: True diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml index 62ce33e6f2..6b5e1d1d5a 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml @@ -13,8 +13,6 @@ cosine_final_lr_ratio: 0.1 batch_accumulate: 1 initial_lr: 0.001 loss: RescoringLoss -criterion_params: {} - mixed_precision: False optimizer: AdamW diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml index 65239ffa13..41df8e3b97 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml @@ -7,11 +7,10 @@ lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.01 batch_accumulate: 1 initial_lr: 0.01 -loss: SSDLoss - -criterion_params: - alpha: 1.0 - dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN. +loss: + SSDLoss: + alpha: 1.0 + dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN. optimizer: SGD optimizer_params: diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml index fcc3fa4ba1..9fb65aef55 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml @@ -12,11 +12,10 @@ batch_accumulate: 1 save_ckpt_epoch_list: [285] -loss: YoloXDetectionLoss - -criterion_params: - strides: [8, 16, 32] # output strides of all yolo outputs - num_classes: 80 +loss: + YoloXDetectionLoss: + strides: [8, 16, 32] # output strides of all yolo outputs + num_classes: 80 diff --git a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml index 0015f58e9d..d4e24d0659 100644 --- a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml @@ -38,7 +38,7 @@ zero_weight_decay_on_bias_and_bn: False # whether to apply weight decay on batch loss: # Loss function for training (str as one of SuperGradient's built in options, or torch.nn.module) -criterion_params: {} # when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params. +criterion_params: {} # (DEPRECATED) when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params. ema: False # whether to use Model Exponential Moving Average diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml index 766b968597..0f4b8783cb 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml @@ -20,9 +20,9 @@ ema_params: decay: 0.9999 decay_type: constant -loss: LabelSmoothingCrossEntropyLoss -criterion_params: - smooth_eps: 0.1 +loss: + LabelSmoothingCrossEntropyLoss: + smooth_eps: 0.1 metric_to_watch: Accuracy diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml index 1dddb79b14..05040f677b 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml @@ -10,9 +10,9 @@ optimizer_params: weight_decay: 0.00004 lr_warmup_epochs: 5 -loss: LabelSmoothingCrossEntropyLoss -criterion_params: - smooth_eps: 0.1 +loss: + LabelSmoothingCrossEntropyLoss: + smooth_eps: 0.1 zero_weight_decay_on_bias_and_bn: True ema: True diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml index b1b90729ea..3cc4c074c2 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml @@ -20,9 +20,9 @@ ema_params: decay_type: constant decay: 0.9999 -loss: LabelSmoothingCrossEntropyLoss -criterion_params: - smooth_eps: 0.1 +loss: + LabelSmoothingCrossEntropyLoss: + smooth_eps: 0.1 metric_to_watch: Accuracy diff --git a/src/super_gradients/training/params.py b/src/super_gradients/training/params.py index 1388457841..981d807bb4 100755 --- a/src/super_gradients/training/params.py +++ b/src/super_gradients/training/params.py @@ -1,3 +1,4 @@ +from super_gradients.common.deprecate import deprecated_training_param, get_deprecated_nested_params_to_factory_format_assigner from super_gradients.training.utils import HpmStruct from copy import deepcopy @@ -9,7 +10,6 @@ "cosine_final_lr_ratio": 0.01, "optimizer": "SGD", "optimizer_params": {}, - "criterion_params": {}, "ema": False, "batch_accumulate": 1, # number of batches to accumulate before every backward pass "ema_params": {}, @@ -115,6 +115,9 @@ def __init__(self, **entries): if len(entries) > 0: self.override(**entries) + @deprecated_training_param( + "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params") + ) def override(self, **entries): super().override(**entries) self.validate() diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py index 97ecea6f9c..fb9e554827 100755 --- a/src/super_gradients/training/sg_trainer/sg_trainer.py +++ b/src/super_gradients/training/sg_trainer/sg_trainer.py @@ -1181,11 +1181,7 @@ def forward(self, inputs, targets): self.metric_to_watch = self.training_params.metric_to_watch self.greater_metric_to_watch_is_better = self.training_params.greater_metric_to_watch_is_better - # Allowing loading instantiated loss or string - if isinstance(self.training_params.loss, str): - self.criterion = LossesFactory().get({self.training_params.loss: self.training_params.criterion_params}) - - elif isinstance(self.training_params.loss, Mapping): + if isinstance(self.training_params.loss, Mapping) or isinstance(self.training_params.loss, str): self.criterion = LossesFactory().get(self.training_params.loss) elif isinstance(self.training_params.loss, nn.Module): diff --git a/tests/integration_tests/conversion_callback_test.py b/tests/integration_tests/conversion_callback_test.py index 22b01512cf..1f405f3c47 100644 --- a/tests/integration_tests/conversion_callback_test.py +++ b/tests/integration_tests/conversion_callback_test.py @@ -59,7 +59,6 @@ def test_classification_architectures(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/tests/integration_tests/deci_lab_export_test.py b/tests/integration_tests/deci_lab_export_test.py index 50e6132d2e..421510bb8c 100644 --- a/tests/integration_tests/deci_lab_export_test.py +++ b/tests/integration_tests/deci_lab_export_test.py @@ -49,7 +49,6 @@ def test_train_with_deci_lab_integration(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": self.optimizer, - "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/tests/integration_tests/ema_train_integration_test.py b/tests/integration_tests/ema_train_integration_test.py index 3bca4b3204..6d60bfdbf2 100644 --- a/tests/integration_tests/ema_train_integration_test.py +++ b/tests/integration_tests/ema_train_integration_test.py @@ -55,7 +55,6 @@ def _train(self, ema_params): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "ema": True, "ema_params": ema_params, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/integration_tests/pretrained_models_test.py b/tests/integration_tests/pretrained_models_test.py index 33e172762d..bad5949876 100644 --- a/tests/integration_tests/pretrained_models_test.py +++ b/tests/integration_tests/pretrained_models_test.py @@ -133,8 +133,7 @@ def setUp(self) -> None: "cosine_final_lr_ratio": 0.01, "lr_warmup_epochs": 3, "batch_accumulate": 1, - "loss": "SSDLoss", - "criterion_params": {"dboxes": ssd_dboxes}, + "loss": {"SSDLoss": {"dboxes": ssd_dboxes}}, "optimizer": "SGD", "warmup_momentum": 0.8, "optimizer_params": {"momentum": 0.937, "weight_decay": 0.0005, "nesterov": True}, @@ -150,8 +149,7 @@ def setUp(self) -> None: "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "YoloXDetectionLoss", - "criterion_params": {"strides": [8, 16, 32], "num_classes": 5}, # output strides of all yolo outputs + "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 5}}, "train_metrics_list": [], "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=5)], "metric_to_watch": "mAP@0.50:0.95", diff --git a/tests/recipe_training_tests/coded_qat_launch_test.py b/tests/recipe_training_tests/coded_qat_launch_test.py index 243d78cd53..267f6a3a05 100644 --- a/tests/recipe_training_tests/coded_qat_launch_test.py +++ b/tests/recipe_training_tests/coded_qat_launch_test.py @@ -22,7 +22,6 @@ def test_qat_launch(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -83,7 +82,6 @@ def test_ptq_launch(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/dataset_statistics_test.py b/tests/unit_tests/dataset_statistics_test.py index f68fbb562e..f0d0c7c93c 100644 --- a/tests/unit_tests/dataset_statistics_test.py +++ b/tests/unit_tests/dataset_statistics_test.py @@ -26,8 +26,7 @@ def test_dataset_statistics_tensorboard_logger(self): "max_epochs": 1, # we dont really need the actual training to run "lr_mode": "CosineLRScheduler", "initial_lr": 0.01, - "loss": "YoloXDetectionLoss", - "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, + "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}}, "dataset_statistics": True, "launch_tensorboard": True, "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)], diff --git a/tests/unit_tests/detection_dataset_test.py b/tests/unit_tests/detection_dataset_test.py index 8bbde92259..1856cfabcb 100644 --- a/tests/unit_tests/detection_dataset_test.py +++ b/tests/unit_tests/detection_dataset_test.py @@ -173,9 +173,8 @@ def test_coco_detection_metrics_with_classwise_ap(self): "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "YoloXDetectionLoss", + "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}}, "mixed_precision": False, - "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, # output strides of all yolo outputs "train_metrics_list": [], "valid_metrics_list": [ DetectionMetrics( diff --git a/tests/unit_tests/double_training_test.py b/tests/unit_tests/double_training_test.py index 4a9ab0b265..8cf74a3f3f 100644 --- a/tests/unit_tests/double_training_test.py +++ b/tests/unit_tests/double_training_test.py @@ -29,7 +29,6 @@ def test_call_train_twice(self): "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/early_stop_test.py b/tests/unit_tests/early_stop_test.py index 2082d2fd73..d4789b7d85 100644 --- a/tests/unit_tests/early_stop_test.py +++ b/tests/unit_tests/early_stop_test.py @@ -54,7 +54,6 @@ def setUp(self) -> None: "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Top5()], diff --git a/tests/unit_tests/factories_test.py b/tests/unit_tests/factories_test.py index e3b7babba0..7be16c729e 100644 --- a/tests/unit_tests/factories_test.py +++ b/tests/unit_tests/factories_test.py @@ -26,7 +26,6 @@ def test_training_with_factories(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "torch.optim.ASGD", # use an optimizer by factory - "criterion_params": {}, "optimizer_params": {"lambd": 0.0001, "alpha": 0.75}, "train_metrics_list": ["Accuracy", "Top5"], # use a metric by factory "valid_metrics_list": ["Accuracy", "Top5"], # use a metric by factory @@ -52,7 +51,6 @@ def test_training_with_factories_with_typos(self): "initial_lr": 0.1, "loss": "crossEnt_ropy", "optimizer": "AdAm_", # use an optimizer by factory - "criterion_params": {}, "train_metrics_list": ["accur_acy", "Top_5"], # use a metric by factory "valid_metrics_list": ["aCCuracy", "Top5"], # use a metric by factory "metric_to_watch": "Accurac_Y", diff --git a/tests/unit_tests/forward_pass_prep_fn_test.py b/tests/unit_tests/forward_pass_prep_fn_test.py index 57ccf27f69..8260fe0a41 100644 --- a/tests/unit_tests/forward_pass_prep_fn_test.py +++ b/tests/unit_tests/forward_pass_prep_fn_test.py @@ -44,7 +44,6 @@ def test_resizing_with_forward_pass_prep_fn(self): "initial_lr": 1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/kd_ema_test.py b/tests/unit_tests/kd_ema_test.py index bbdf9164bd..aaa682d076 100644 --- a/tests/unit_tests/kd_ema_test.py +++ b/tests/unit_tests/kd_ema_test.py @@ -25,7 +25,6 @@ def setUp(cls): "initial_lr": 0.1, "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/kd_trainer_test.py b/tests/unit_tests/kd_trainer_test.py index 98b3a37f3f..fc73fcd1d8 100644 --- a/tests/unit_tests/kd_trainer_test.py +++ b/tests/unit_tests/kd_trainer_test.py @@ -47,7 +47,6 @@ def setUp(cls): "initial_lr": 0.1, "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/load_ema_ckpt_test.py b/tests/unit_tests/load_ema_ckpt_test.py index c1d1fe1d98..0db84feb83 100644 --- a/tests/unit_tests/load_ema_ckpt_test.py +++ b/tests/unit_tests/load_ema_ckpt_test.py @@ -28,7 +28,6 @@ def setUp(self) -> None: "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/local_ckpt_head_replacement_test.py b/tests/unit_tests/local_ckpt_head_replacement_test.py index 0d100e364a..04659f0fb4 100644 --- a/tests/unit_tests/local_ckpt_head_replacement_test.py +++ b/tests/unit_tests/local_ckpt_head_replacement_test.py @@ -19,7 +19,6 @@ def test_local_ckpt_head_replacement(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/loss_loggings_test.py b/tests/unit_tests/loss_loggings_test.py index 5294885bd1..9c44fc2237 100644 --- a/tests/unit_tests/loss_loggings_test.py +++ b/tests/unit_tests/loss_loggings_test.py @@ -40,7 +40,6 @@ def test_single_item_logging(self): "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -64,7 +63,6 @@ def test_multiple_unnamed_components_loss_logging(self): "initial_lr": 0.1, "loss": CriterionWithUnnamedComponents(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -88,7 +86,6 @@ def test_multiple_named_components_loss_logging(self): "initial_lr": 0.1, "loss": CriterionWithNamedComponents(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/lr_cooldown_test.py b/tests/unit_tests/lr_cooldown_test.py index 668bc0c74f..362c2277aa 100644 --- a/tests/unit_tests/lr_cooldown_test.py +++ b/tests/unit_tests/lr_cooldown_test.py @@ -25,7 +25,6 @@ def test_lr_cooldown_with_lr_scheduling(self): "initial_lr": 1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/lr_warmup_test.py b/tests/unit_tests/lr_warmup_test.py index 2521090499..9c6167cd35 100644 --- a/tests/unit_tests/lr_warmup_test.py +++ b/tests/unit_tests/lr_warmup_test.py @@ -63,7 +63,6 @@ def test_lr_warmup(self): "initial_lr": 1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -99,7 +98,6 @@ def test_lr_warmup_with_lr_scheduling(self): "initial_lr": 1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -145,7 +143,6 @@ def test_warmup_linear_batch_step(self): "initial_lr": 1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -192,7 +189,6 @@ def test_warmup_linear_epoch_step(self): "warmup_initial_lr": 4.0, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -228,7 +224,6 @@ def test_custom_lr_warmup(self): "lr_warmup_epochs": 3, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/max_batches_loop_break_test.py b/tests/unit_tests/max_batches_loop_break_test.py index bbaa483e09..bb416e1b03 100644 --- a/tests/unit_tests/max_batches_loop_break_test.py +++ b/tests/unit_tests/max_batches_loop_break_test.py @@ -28,7 +28,6 @@ def test_max_train_batches_loop_break(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -63,7 +62,6 @@ def test_max_valid_batches_loop_break(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/optimizer_params_override_test.py b/tests/unit_tests/optimizer_params_override_test.py index f0b250b160..97906964ee 100644 --- a/tests/unit_tests/optimizer_params_override_test.py +++ b/tests/unit_tests/optimizer_params_override_test.py @@ -21,7 +21,6 @@ def test_optimizer_params_partial_override(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"momentum": 0.9}, "zero_weight_decay_on_bias_and_bn": True, "train_metrics_list": [Accuracy(), Top5()], @@ -50,7 +49,6 @@ def test_optimizer_params_full_override(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "zero_weight_decay_on_bias_and_bn": True, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/phase_context_test.py b/tests/unit_tests/phase_context_test.py index 5fb20101c4..a11b8819da 100644 --- a/tests/unit_tests/phase_context_test.py +++ b/tests/unit_tests/phase_context_test.py @@ -33,7 +33,6 @@ def context_information_in_train_test(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Top5()], diff --git a/tests/unit_tests/preprocessing_unit_test.py b/tests/unit_tests/preprocessing_unit_test.py index 8af6f1ced3..4c15444ef4 100644 --- a/tests/unit_tests/preprocessing_unit_test.py +++ b/tests/unit_tests/preprocessing_unit_test.py @@ -102,8 +102,7 @@ def test_setting_preprocessing_params_from_validation_set(self): "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "YoloXDetectionLoss", - "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, # output strides of all yolo outputs + "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}}, "train_metrics_list": [], "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)], "metric_to_watch": "mAP@0.50:0.95", @@ -173,8 +172,7 @@ def test_setting_preprocessing_params_from_checkpoint(self): "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "YoloXDetectionLoss", - "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, # output strides of all yolo outputs + "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}}, "train_metrics_list": [], "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)], "metric_to_watch": "mAP@0.50:0.95", diff --git a/tests/unit_tests/resume_training_test.py b/tests/unit_tests/resume_training_test.py index 6c8bc0b465..b08bfb0286 100644 --- a/tests/unit_tests/resume_training_test.py +++ b/tests/unit_tests/resume_training_test.py @@ -36,7 +36,6 @@ def test_resume_training(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -82,7 +81,6 @@ def test_resume_run_id_training(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -147,7 +145,6 @@ def test_resume_external_training(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -195,7 +192,6 @@ def test_resume_external_training_same_dir(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/save_ckpt_test.py b/tests/unit_tests/save_ckpt_test.py index 11ae820467..3465ae7f73 100644 --- a/tests/unit_tests/save_ckpt_test.py +++ b/tests/unit_tests/save_ckpt_test.py @@ -18,7 +18,6 @@ def setUp(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "save_ckpt_epoch_list": [1, 3], "loss": "CrossEntropyLoss", diff --git a/tests/unit_tests/test_train_with_torch_scheduler.py b/tests/unit_tests/test_train_with_torch_scheduler.py index a561667782..155b861d45 100644 --- a/tests/unit_tests/test_train_with_torch_scheduler.py +++ b/tests/unit_tests/test_train_with_torch_scheduler.py @@ -29,7 +29,6 @@ def _run_scheduler_test(self, scheduler_name, scheduler_params, expected_lr, epo "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [DummyMetric()], "valid_metrics_list": [DummyMetric()], diff --git a/tests/unit_tests/train_after_test_test.py b/tests/unit_tests/train_after_test_test.py index d0a7ec085e..3239b1ea76 100644 --- a/tests/unit_tests/train_after_test_test.py +++ b/tests/unit_tests/train_after_test_test.py @@ -25,7 +25,6 @@ def setUp(self) -> None: "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/train_logging_test.py b/tests/unit_tests/train_logging_test.py index 5fbb16a539..ae1c178d2e 100644 --- a/tests/unit_tests/train_logging_test.py +++ b/tests/unit_tests/train_logging_test.py @@ -24,7 +24,6 @@ def test_train_logging(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/train_with_intialized_param_args_test.py b/tests/unit_tests/train_with_intialized_param_args_test.py index d1dcefbd22..be3030a0ea 100644 --- a/tests/unit_tests/train_with_intialized_param_args_test.py +++ b/tests/unit_tests/train_with_intialized_param_args_test.py @@ -33,7 +33,6 @@ def test_train_with_external_criterion(self): "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -57,7 +56,6 @@ def test_train_with_external_optimizer(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": optimizer, - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -83,7 +81,6 @@ def test_train_with_external_scheduler(self): "initial_lr": lr, "loss": "CrossEntropyLoss", "optimizer": optimizer, - "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", @@ -105,7 +102,6 @@ def test_train_with_external_scheduler_class(self): "initial_lr": 0.3, "loss": "CrossEntropyLoss", "optimizer": optimizer, - "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", @@ -130,7 +126,6 @@ def test_train_with_reduce_on_plateau(self): "initial_lr": lr, "loss": "CrossEntropyLoss", "optimizer": optimizer, - "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5(), ToyTestClassificationMetric()], "metric_to_watch": "Accuracy", @@ -153,7 +148,6 @@ def test_train_with_external_metric(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [F1Score()], "valid_metrics_list": [F1Score()], @@ -183,7 +177,6 @@ def test_train_with_external_dataloaders(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [F1Score()], "valid_metrics_list": [F1Score()], diff --git a/tests/unit_tests/train_with_precise_bn_test.py b/tests/unit_tests/train_with_precise_bn_test.py index a67d87bb40..e07375d73e 100644 --- a/tests/unit_tests/train_with_precise_bn_test.py +++ b/tests/unit_tests/train_with_precise_bn_test.py @@ -23,7 +23,6 @@ def test_train_with_precise_bn_explicit_size(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], @@ -52,7 +51,6 @@ def test_train_with_precise_bn_implicit_size(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], diff --git a/tests/unit_tests/update_param_groups_unit_test.py b/tests/unit_tests/update_param_groups_unit_test.py index e4edd4ca02..b4a24a9a80 100644 --- a/tests/unit_tests/update_param_groups_unit_test.py +++ b/tests/unit_tests/update_param_groups_unit_test.py @@ -40,7 +40,6 @@ def test_lr_scheduling_with_update_param_groups(self): "lr_decay_factor": 1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/vit_unit_test.py b/tests/unit_tests/vit_unit_test.py index b9a3527761..a1f2ccf18a 100644 --- a/tests/unit_tests/vit_unit_test.py +++ b/tests/unit_tests/vit_unit_test.py @@ -20,7 +20,6 @@ def setUp(self): "initial_lr": 0.1, "loss": "CrossEntropyLoss", "optimizer": "SGD", - "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], From b66f46628a785551220691d415375c3c586503c6 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Tue, 10 Oct 2023 17:36:44 +0300 Subject: [PATCH 2/9] all refs removed --- src/super_gradients/recipes/cityscapes_segformer.yaml | 6 +++--- src/super_gradients/recipes/imagenet_resnet50_kd.yaml | 10 +++++----- src/super_gradients/recipes/roboflow_yolo_nas_m.yaml | 3 --- src/super_gradients/recipes/roboflow_yolo_nas_s.yaml | 2 -- .../cifar10_resnet_train_params.yaml | 1 - 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/super_gradients/recipes/cityscapes_segformer.yaml b/src/super_gradients/recipes/cityscapes_segformer.yaml index fb4d8bb227..7f3bd9b849 100644 --- a/src/super_gradients/recipes/cityscapes_segformer.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer.yaml @@ -95,9 +95,9 @@ training_hyperparams: sync_bn: True - loss: LabelSmoothingCrossEntropyLoss - criterion_params: - ignore_index: ${cityscapes_ignored_label} + loss: + LabelSmoothingCrossEntropyLoss: + ignore_index: ${cityscapes_ignored_label} phase_callbacks: - SlidingWindowValidationCallback: diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml index 2bc9109f46..5ebbd6a200 100644 --- a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml +++ b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml @@ -25,11 +25,11 @@ val_dataloader: imagenet_val resume: False training_hyperparams: resume: ${resume} - loss: KDLogitsLoss - criterion_params: - distillation_loss_coeff: 0.8 - task_loss_fn: - _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss + loss: + KDLogitsLoss: + distillation_loss_coeff: 0.8 + task_loss_fn: + _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss arch_params: teacher_input_adapter: diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml index 2d6641e801..f0350e2ce8 100644 --- a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml +++ b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml @@ -60,9 +60,6 @@ training_hyperparams: max_epochs: 100 mixed_precision: True - criterion_params: - num_classes: ${num_classes} - phase_callbacks: [] loss: diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml index 8fb2baf901..ce9dbf4332 100644 --- a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml +++ b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml @@ -60,8 +60,6 @@ training_hyperparams: max_epochs: 100 mixed_precision: True - criterion_params: - num_classes: ${num_classes} phase_callbacks: [] diff --git a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml index 0905ba57ff..ba83f94d04 100644 --- a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml @@ -15,7 +15,6 @@ lr_warmup_epochs: 0 initial_lr: 0.1 loss: LabelSmoothingCrossEntropyLoss optimizer: SGD -criterion_params: {} optimizer_params: weight_decay: 1e-4 From 5192ddda65bb5e75e5dc817eea8d290b43184532 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 11 Oct 2023 10:39:43 +0300 Subject: [PATCH 3/9] yolox ref removed --- src/super_gradients/recipes/roboflow_yolox.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/super_gradients/recipes/roboflow_yolox.yaml b/src/super_gradients/recipes/roboflow_yolox.yaml index 38ad33fa43..c26f4d8c2a 100644 --- a/src/super_gradients/recipes/roboflow_yolox.yaml +++ b/src/super_gradients/recipes/roboflow_yolox.yaml @@ -39,8 +39,11 @@ resume: False training_hyperparams: max_epochs: 100 resume: ${resume} - criterion_params: - num_classes: ${num_classes} + loss: + YoloXDetectionLoss: + strides: [ 8, 16, 32 ] # output strides of all yolo outputs + num_classes: ${num_classes} + train_metrics_list: - DetectionMetrics: normalize_targets: True From 9c24d6c857b310d8d385eb7a76978d782cc23a0f Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 11 Oct 2023 14:10:25 +0300 Subject: [PATCH 4/9] added tests --- tests/unit_tests/test_deprecations.py | 60 ++++++++++++++++++- .../unit_tests/train_with_precise_bn_test.py | 1 + 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests/test_deprecations.py b/tests/unit_tests/test_deprecations.py index 23b0ddb9f9..dc0c25c59e 100644 --- a/tests/unit_tests/test_deprecations.py +++ b/tests/unit_tests/test_deprecations.py @@ -1,12 +1,17 @@ import unittest +import warnings from typing import Union from omegaconf import DictConfig from torch import nn +from super_gradients import setup_device, Trainer from super_gradients.common.registry import register_model from super_gradients.training import models -from super_gradients.training.models import CustomizableDetector, get_arch_params +from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader +from super_gradients.training.metrics import Accuracy, Top5 +from super_gradients.training.models import CustomizableDetector, get_arch_params, ResNet18 +from super_gradients.training.params import TrainingParams from super_gradients.training.utils import HpmStruct from super_gradients.training.utils.utils import arch_params_deprecated from super_gradients.training.transforms.transforms import DetectionTargetsFormatTransform, DetectionHorizontalFlip, DetectionPaddedRescale @@ -98,6 +103,59 @@ def test_deprecated_HpmStruct_import(self): except ImportError: self.fail("ImportError raised unexpectedly for HpmStruct") + def test_deprecated_criterion_params(self): + with self.assertWarns(DeprecationWarning): + warnings.simplefilter("always") + train_params = { + "max_epochs": 4, + "lr_decay_factor": 0.1, + "lr_updates": [4], + "lr_mode": "StepLRScheduler", + "lr_warmup_epochs": 0, + "initial_lr": 0.1, + "loss": "CrossEntropyLoss", + "optimizer": "SGD", + "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, + "loss": "CrossEntropyLoss", + "train_metrics_list": [], + "valid_metrics_list": [], + "metric_to_watch": "Accuracy", + "greater_metric_to_watch_is_better": True, + } + train_params = TrainingParams(**train_params) + train_params.override(criterion_params={"ignore_index": 0}) + + def test_train_with_deprecated_criterion_params(self): + setup_device(device="cpu") + trainer = Trainer("test_train_with_precise_bn_explicit_size") + net = ResNet18(num_classes=5, arch_params={}) + train_params = { + "max_epochs": 2, + "lr_updates": [1], + "lr_decay_factor": 0.1, + "lr_mode": "StepLRScheduler", + "lr_warmup_epochs": 0, + "initial_lr": 0.1, + "loss": "CrossEntropyLoss", + "criterion_params": {"ignore_index": -300}, + "optimizer": "SGD", + "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, + "train_metrics_list": [Accuracy(), Top5()], + "valid_metrics_list": [Accuracy(), Top5()], + "metric_to_watch": "Accuracy", + "greater_metric_to_watch_is_better": True, + "precise_bn": True, + "precise_bn_batch_size": 100, + } + trainer.train( + model=net, + training_params=train_params, + train_loader=classification_test_dataloader(batch_size=10), + valid_loader=classification_test_dataloader(batch_size=10), + ) + + self.assertEqual(trainer.criterion.ignore_index, -300) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit_tests/train_with_precise_bn_test.py b/tests/unit_tests/train_with_precise_bn_test.py index e07375d73e..7313aa4d74 100644 --- a/tests/unit_tests/train_with_precise_bn_test.py +++ b/tests/unit_tests/train_with_precise_bn_test.py @@ -22,6 +22,7 @@ def test_train_with_precise_bn_explicit_size(self): "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": "CrossEntropyLoss", + "criterion_params": {"ignore_index": 0}, "optimizer": "SGD", "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], From f85d1fbf2c31a611c847d71768ae49986128b195 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 11 Oct 2023 15:34:37 +0300 Subject: [PATCH 5/9] added docs --- src/super_gradients/common/deprecate.py | 63 +++++++++++++++++++++++++ tests/unit_tests/test_deprecations.py | 4 +- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/super_gradients/common/deprecate.py b/src/super_gradients/common/deprecate.py index 3fbb3f2285..0f77138e47 100644 --- a/src/super_gradients/common/deprecate.py +++ b/src/super_gradients/common/deprecate.py @@ -79,6 +79,37 @@ def wrapper(*args, **kwargs): def deprecated_training_param(deprecated_tparam_name: str, deprecated_since: str, removed_from: str, new_arg_assigner: Callable, message: str = ""): + """ + Decorator for deprecating training hyperparameters. + + Recommended tp be used as a decorator on top of super_gradients.training.params.TrainingParams's override method: + + class TrainingParams(HpmStruct): + def __init__(self, **entries): + # WE initialize by the default training params, overridden by the provided params + default_training_params = deepcopy(DEFAULT_TRAINING_PARAMS) + super().__init__(**default_training_params) + self.set_schema(TRAINING_PARAM_SCHEMA) + if len(entries) > 0: + self.override(**entries) + + @deprecated_training_param( + "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params") + ) + def override(self, **entries): + super().override(**entries) + self.validate() + + + :param deprecated_tparam_name: str, the name of the deprecated hyperparameter. + :param deprecated_since: str, SG version of deprecation. + :param removed_from: str, SG version of removal. + :param new_arg_assigner: Callable, a handler to assign the deprecated parameter value to the updated + hyperparameter entry. + :param message: str, message to append to the deprecation warning (default="") + :return: + """ + def decorator(func): def wrapper(*args, **training_params): if deprecated_tparam_name in training_params: @@ -107,6 +138,38 @@ def wrapper(*args, **training_params): def get_deprecated_nested_params_to_factory_format_assigner(param_name: str, nested_params_name: str) -> Callable: + """ + Returns an assigner to be used by deprecated_training_param decorator. + + The assigner takes a deprecated parameter name, and its __init___ arguments that previously were passed + through nested_params_name entry in training_params and manipulates the training_params so they are in 'Factory' format. + For example: + + class TrainingParams(HpmStruct): + def __init__(self, **entries): + # WE initialize by the default training params, overridden by the provided params + default_training_params = deepcopy(DEFAULT_TRAINING_PARAMS) + super().__init__(**default_training_params) + self.set_schema(TRAINING_PARAM_SCHEMA) + if len(entries) > 0: + self.override(**entries) + + @deprecated_training_param( + "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params") + ) + def override(self, **entries): + super().override(**entries) + self.validate() + + + then under the hood, training_params.loss will be set to + {training_params.loss: training_params.criterion_params} + + :param param_name: str, parameter name (for example, 'loss'). + :param nested_params_name: str, nested_params_name (for example, 'criterion_params') + :return: Callable as described above. + """ + def deprecated_nested_params_to_factory_format_assigner(**params): nested_params = params.get(nested_params_name) param_val = params.get(param_name) diff --git a/tests/unit_tests/test_deprecations.py b/tests/unit_tests/test_deprecations.py index dc0c25c59e..7096b1c800 100644 --- a/tests/unit_tests/test_deprecations.py +++ b/tests/unit_tests/test_deprecations.py @@ -5,7 +5,7 @@ from omegaconf import DictConfig from torch import nn -from super_gradients import setup_device, Trainer +from super_gradients import Trainer from super_gradients.common.registry import register_model from super_gradients.training import models from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader @@ -126,7 +126,6 @@ def test_deprecated_criterion_params(self): train_params.override(criterion_params={"ignore_index": 0}) def test_train_with_deprecated_criterion_params(self): - setup_device(device="cpu") trainer = Trainer("test_train_with_precise_bn_explicit_size") net = ResNet18(num_classes=5, arch_params={}) train_params = { @@ -145,7 +144,6 @@ def test_train_with_deprecated_criterion_params(self): "metric_to_watch": "Accuracy", "greater_metric_to_watch_is_better": True, "precise_bn": True, - "precise_bn_batch_size": 100, } trainer.train( model=net, From 56512cb36d62d73f7ef492b939c97d6ac14cf295 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 11 Oct 2023 19:23:28 +0300 Subject: [PATCH 6/9] fixed test and updated factory for kdloss param --- src/super_gradients/training/losses/kd_losses.py | 3 +++ tests/unit_tests/test_deprecations.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/super_gradients/training/losses/kd_losses.py b/src/super_gradients/training/losses/kd_losses.py index a42ee2c448..ee4983f4d6 100644 --- a/src/super_gradients/training/losses/kd_losses.py +++ b/src/super_gradients/training/losses/kd_losses.py @@ -1,6 +1,8 @@ from torch.nn.modules.loss import _Loss, KLDivLoss import torch +from super_gradients.common.decorators.factory_decorator import resolve_param +from super_gradients.common.factories.losses_factory import LossesFactory from super_gradients.common.object_names import Losses from super_gradients.common.registry.registry import register_loss @@ -19,6 +21,7 @@ def forward(self, student_output, teacher_output): class KDLogitsLoss(_Loss): """Knowledge distillation loss, wraps the task loss and distillation loss""" + @resolve_param("task_loss_fn", LossesFactory()) def __init__(self, task_loss_fn: _Loss, distillation_loss_fn: _Loss = KDklDivLoss(), distillation_loss_coeff: float = 0.5): """ :param task_loss_fn: task loss. E.g., CrossEntropyLoss diff --git a/tests/unit_tests/test_deprecations.py b/tests/unit_tests/test_deprecations.py index 7096b1c800..cf3d7c9a76 100644 --- a/tests/unit_tests/test_deprecations.py +++ b/tests/unit_tests/test_deprecations.py @@ -143,7 +143,6 @@ def test_train_with_deprecated_criterion_params(self): "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", "greater_metric_to_watch_is_better": True, - "precise_bn": True, } trainer.train( model=net, From e2e1566159629957718d0108570dcba40acd604b Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 11 Oct 2023 19:26:08 +0300 Subject: [PATCH 7/9] fixed yaml celoss ref --- src/super_gradients/recipes/imagenet_resnet50_kd.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml index 5ebbd6a200..bd6077335a 100644 --- a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml +++ b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml @@ -28,8 +28,7 @@ training_hyperparams: loss: KDLogitsLoss: distillation_loss_coeff: 0.8 - task_loss_fn: - _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss + task_loss_fn: CrossEntropyLoss arch_params: teacher_input_adapter: From 5b8f8030c6fc0b98cc2eea7e48f1a4361f8b3aa0 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Wed, 11 Oct 2023 21:02:39 +0300 Subject: [PATCH 8/9] fixed unittest --- tests/unit_tests/training_params_factory_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/training_params_factory_test.py b/tests/unit_tests/training_params_factory_test.py index 5e30984841..02ed0e57b7 100644 --- a/tests/unit_tests/training_params_factory_test.py +++ b/tests/unit_tests/training_params_factory_test.py @@ -5,7 +5,7 @@ class TrainingParamsTest(unittest.TestCase): def test_get_train_params(self): train_params = training_hyperparams.coco2017_yolox_train_params() - self.assertTrue(train_params["loss"] == "YoloXDetectionLoss") + self.assertTrue(list(train_params["loss"].keys())[0] == "YoloXDetectionLoss") self.assertTrue(train_params["max_epochs"] == 300) def test_get_train_params_with_overrides(self): From b25434de92b0539771d17464fac34cade59337df Mon Sep 17 00:00:00 2001 From: shayaharon Date: Thu, 12 Oct 2023 11:52:52 +0300 Subject: [PATCH 9/9] fixed last unit test --- tests/unit_tests/training_params_factory_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/training_params_factory_test.py b/tests/unit_tests/training_params_factory_test.py index 02ed0e57b7..77f46e8f75 100644 --- a/tests/unit_tests/training_params_factory_test.py +++ b/tests/unit_tests/training_params_factory_test.py @@ -10,7 +10,7 @@ def test_get_train_params(self): def test_get_train_params_with_overrides(self): train_params = training_hyperparams.coco2017_yolox_train_params(overriding_params={"max_epochs": 5}) - self.assertTrue(train_params["loss"] == "YoloXDetectionLoss") + self.assertTrue(list(train_params["loss"].keys())[0] == "YoloXDetectionLoss") self.assertTrue(train_params["max_epochs"] == 5)