From c26527171a30a31b9b3966f9d2f3cb4d28fe3c55 Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Thu, 5 Oct 2023 17:37:47 +0300
Subject: [PATCH 1/9] added deprecation decorator and removed some refs

---
 documentation/source/Checkpoints.md           |  2 +-
 .../source/Example_Classification.md          |  1 -
 documentation/source/LRScheduling.md          |  6 +--
 documentation/source/Losses.md                | 18 ++++-----
 documentation/source/PhaseCallbacks.md        |  2 +-
 documentation/source/configuration_files.md   |  1 -
 src/super_gradients/common/deprecate.py       | 40 ++++++++++++++++++-
 .../cifar10_training_torch_objects_example.py |  1 -
 .../deci_lab_export_example.py                |  1 -
 .../examples/early_stop/early_stop_example.py |  1 -
 .../recipes/cityscapes_regseg48.yaml          |  6 +--
 .../coco2017_ssd_lite_mobilenet_v2.yaml       |  8 ++--
 .../recipes/roboflow_ppyoloe.yaml             |  2 -
 .../coco2017_dekr_pose_train_params.yaml      | 11 +++--
 .../coco2017_rescoring_train_params.yaml      |  2 -
 ...17_ssd_lite_mobilenet_v2_train_params.yaml |  9 ++---
 .../coco2017_yolox_train_params.yaml          |  9 ++---
 .../default_train_params.yaml                 |  2 +-
 .../imagenet_efficientnet_train_params.yaml   |  6 +--
 .../imagenet_mobilenetv3_train_params.yaml    |  6 +--
 .../imagenet_regnetY_train_params.yaml        |  6 +--
 src/super_gradients/training/params.py        |  5 ++-
 .../training/sg_trainer/sg_trainer.py         |  6 +--
 .../conversion_callback_test.py               |  1 -
 .../integration_tests/deci_lab_export_test.py |  1 -
 .../ema_train_integration_test.py             |  1 -
 .../pretrained_models_test.py                 |  6 +--
 .../coded_qat_launch_test.py                  |  2 -
 tests/unit_tests/dataset_statistics_test.py   |  3 +-
 tests/unit_tests/detection_dataset_test.py    |  3 +-
 tests/unit_tests/double_training_test.py      |  1 -
 tests/unit_tests/early_stop_test.py           |  1 -
 tests/unit_tests/factories_test.py            |  2 -
 tests/unit_tests/forward_pass_prep_fn_test.py |  1 -
 tests/unit_tests/kd_ema_test.py               |  1 -
 tests/unit_tests/kd_trainer_test.py           |  1 -
 tests/unit_tests/load_ema_ckpt_test.py        |  1 -
 .../local_ckpt_head_replacement_test.py       |  1 -
 tests/unit_tests/loss_loggings_test.py        |  3 --
 tests/unit_tests/lr_cooldown_test.py          |  1 -
 tests/unit_tests/lr_warmup_test.py            |  5 ---
 .../unit_tests/max_batches_loop_break_test.py |  2 -
 .../optimizer_params_override_test.py         |  2 -
 tests/unit_tests/phase_context_test.py        |  1 -
 tests/unit_tests/preprocessing_unit_test.py   |  6 +--
 tests/unit_tests/resume_training_test.py      |  4 --
 tests/unit_tests/save_ckpt_test.py            |  1 -
 .../test_train_with_torch_scheduler.py        |  1 -
 tests/unit_tests/train_after_test_test.py     |  1 -
 tests/unit_tests/train_logging_test.py        |  1 -
 .../train_with_intialized_param_args_test.py  |  7 ----
 .../unit_tests/train_with_precise_bn_test.py  |  2 -
 .../update_param_groups_unit_test.py          |  1 -
 tests/unit_tests/vit_unit_test.py             |  1 -
 54 files changed, 93 insertions(+), 123 deletions(-)

diff --git a/documentation/source/Checkpoints.md b/documentation/source/Checkpoints.md
index 48847ca871..4d1eb149e8 100644
--- a/documentation/source/Checkpoints.md
+++ b/documentation/source/Checkpoints.md
@@ -80,7 +80,7 @@ model = models.get(model_name=Models.RESNET18, num_classes=10)
 train_params = {
     ...
     "loss": "LabelSmoothingCrossEntropyLoss",
-    "criterion_params": {},
+    
     "save_ckpt_epoch_list": [10,15]
     ...
 }
diff --git a/documentation/source/Example_Classification.md b/documentation/source/Example_Classification.md
index f6d9d6606b..3224bdc172 100644
--- a/documentation/source/Example_Classification.md
+++ b/documentation/source/Example_Classification.md
@@ -308,7 +308,6 @@ Output (Training parameters):
     'ckpt_name': 'ckpt_latest.pth',
     'clip_grad_norm': None,
     'cosine_final_lr_ratio': 0.01,
-    'criterion_params': {},
     'dataset_statistics': False,
     'ema': False,
     'ema_params': {'decay': 0.9999, 'decay_type': 'exp', 'beta': 15},
diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md
index 04cfa238bf..4816074fed 100644
--- a/documentation/source/LRScheduling.md
+++ b/documentation/source/LRScheduling.md
@@ -299,7 +299,7 @@ train_params = {
     "initial_lr": 0.1,
     "loss": torch.nn.CrossEntropyLoss(),
     "optimizer": "SGD",
-    "criterion_params": {},
+    
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy()],
     "valid_metrics_list": [Accuracy()],
@@ -327,7 +327,6 @@ training_hyperparams:
     initial_lr: 0.1
     loss: CrossEntropyLoss
     optimizer: SGD
-    criterion_params: {}
     optimizer_params:
       weight_decay: 1e-4
       momentum: 0.9
@@ -366,7 +365,7 @@ train_params = {
     "initial_lr": 0.1,
     "loss": torch.nn.CrossEntropyLoss(),
     "optimizer": "SGD",
-    "criterion_params": {},
+    
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy()],
     "valid_metrics_list": [Accuracy()],
@@ -398,7 +397,6 @@ training_hyperparams:
     initial_lr: 0.1
     loss: CrossEntropyLoss
     optimizer: SGD
-    criterion_params: {}
     optimizer_params:
       weight_decay: 1e-4
       momentum: 0.9
diff --git a/documentation/source/Losses.md b/documentation/source/Losses.md
index e7fd837566..88c1094c5e 100644
--- a/documentation/source/Losses.md
+++ b/documentation/source/Losses.md
@@ -32,7 +32,6 @@ model = ...
 train_params = {
    ...
    "loss": "LabelSmoothingCrossEntropyLoss",
-   "criterion_params": {}
    ...
 }
 trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=valid_dataloader)
@@ -54,15 +53,12 @@ When doing so, in your `my_training_hyperparams.yaml` file:
 ```yaml
 ...
 
-loss: YoloXDetectionLoss
-
-criterion_params:
-   strides: [8, 16, 32]  # output strides of all yolo outputs
-   num_classes: 80
+loss: 
+  YoloXDetectionLoss:
+    strides: [8, 16, 32]  # output strides of all yolo outputs
+    num_classes: 80
 ```
 
-Note that two `training_params` parameters define the loss function:  `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `YoloXDetectionLoss` class constructor.
-
 ## Passing Instantiated nn.Module Objects as Loss Functions
 
 SuperGradients also supports passing instantiated nn.Module Objects as demonstrated below:
@@ -201,9 +197,11 @@ Then, in your `my_training_hyperparams.yaml`, use `"my_loss"` in the same way as
 ```yaml
 ...
 
-loss: my_loss
+loss:
+  my_loss:
+    my_loss_arg1: ...
+    my_loss_arg2: ...
 
-criterion_params:
   ...
 ```
 
diff --git a/documentation/source/PhaseCallbacks.md b/documentation/source/PhaseCallbacks.md
index cc7480aa06..790de0860d 100644
--- a/documentation/source/PhaseCallbacks.md
+++ b/documentation/source/PhaseCallbacks.md
@@ -238,7 +238,7 @@ model = ...
 
 train_params = {
     "loss": "LabelSmoothingCrossEntropyLoss",
-    "criterion_params": {},
+    
     "phase_callbacks": [SaveFirstBatchCallback()],
     ...
 }
diff --git a/documentation/source/configuration_files.md b/documentation/source/configuration_files.md
index bb253bd8c3..5203b04b29 100644
--- a/documentation/source/configuration_files.md
+++ b/documentation/source/configuration_files.md
@@ -30,7 +30,6 @@ lr_warmup_epochs: 0
 initial_lr: 0.1
 loss: LabelSmoothingCrossEntropyLoss
 optimizer: SGD
-criterion_params: {}
 
 optimizer_params:
   weight_decay: 1e-4
diff --git a/src/super_gradients/common/deprecate.py b/src/super_gradients/common/deprecate.py
index 516ec85ff4..3fbb3f2285 100644
--- a/src/super_gradients/common/deprecate.py
+++ b/src/super_gradients/common/deprecate.py
@@ -1,6 +1,6 @@
 import warnings
 from functools import wraps
-from typing import Optional
+from typing import Optional, Callable
 from pkg_resources import parse_version
 
 
@@ -76,3 +76,41 @@ def wrapper(*args, **kwargs):
         return wrapper
 
     return decorator
+
+
+def deprecated_training_param(deprecated_tparam_name: str, deprecated_since: str, removed_from: str, new_arg_assigner: Callable, message: str = ""):
+    def decorator(func):
+        def wrapper(*args, **training_params):
+            if deprecated_tparam_name in training_params:
+                import super_gradients
+
+                is_still_supported = parse_version(super_gradients.__version__) < parse_version(removed_from)
+                if is_still_supported:
+                    message_prefix = (
+                        f"Training hyperparameter `{deprecated_tparam_name} is deprecated since version `{deprecated_since}` "
+                        f"and will be removed in version `{removed_from}`.\n"
+                    )
+                    warnings.warn(message_prefix + message, DeprecationWarning)
+                    training_params = new_arg_assigner(**training_params)
+                else:
+                    message_prefix = (
+                        f"Training hyperparameter `{deprecated_tparam_name} was deprecate since version `{deprecated_since}` "
+                        f"and was removed in version `{removed_from}`.\n"
+                    )
+                    raise RuntimeError(message_prefix + message)
+
+            return func(*args, **training_params)
+
+        return wrapper
+
+    return decorator
+
+
+def get_deprecated_nested_params_to_factory_format_assigner(param_name: str, nested_params_name: str) -> Callable:
+    def deprecated_nested_params_to_factory_format_assigner(**params):
+        nested_params = params.get(nested_params_name)
+        param_val = params.get(param_name)
+        params[param_name] = {param_val: nested_params}
+        return params
+
+    return deprecated_nested_params_to_factory_format_assigner
diff --git a/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py b/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py
index e293106f1f..e845d74b80 100644
--- a/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py
+++ b/src/super_gradients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py
@@ -55,7 +55,6 @@
     "phase_callbacks": phase_callbacks,
     "initial_lr": lr,
     "loss": loss_fn,
-    "criterion_params": {},
     "optimizer": optimizer,
     "train_metrics_list": [Accuracy(), Top5()],
     "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
index 9f49c0130f..4e3b7f942d 100644
--- a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
+++ b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
@@ -61,7 +61,6 @@ def main(architecture_name: str):
         "initial_lr": 0.1,
         "loss": "CrossEntropyLoss",
         "optimizer": "SGD",
-        "criterion_params": {},
         "train_metrics_list": [Accuracy(), Top5()],
         "valid_metrics_list": [Accuracy(), Top5()],
         "metric_to_watch": "Accuracy",
diff --git a/src/super_gradients/examples/early_stop/early_stop_example.py b/src/super_gradients/examples/early_stop/early_stop_example.py
index 5cf1124b06..4575426d13 100644
--- a/src/super_gradients/examples/early_stop/early_stop_example.py
+++ b/src/super_gradients/examples/early_stop/early_stop_example.py
@@ -23,7 +23,6 @@
     "initial_lr": 0.1,
     "loss": "CrossEntropyLoss",
     "optimizer": "SGD",
-    "criterion_params": {},
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy(), Top5()],
     "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/recipes/cityscapes_regseg48.yaml b/src/super_gradients/recipes/cityscapes_regseg48.yaml
index 27c4cbd5ac..8b584c01b2 100644
--- a/src/super_gradients/recipes/cityscapes_regseg48.yaml
+++ b/src/super_gradients/recipes/cityscapes_regseg48.yaml
@@ -62,9 +62,9 @@ training_hyperparams:
 
   ema: True
 
-  loss: LabelSmoothingCrossEntropyLoss
-  criterion_params:
-    ignore_index: ${cityscapes_ignored_label}
+  loss:
+    LabelSmoothingCrossEntropyLoss:
+      ignore_index: ${cityscapes_ignored_label}
 
   train_metrics_list:
     - PixelAccuracy:
diff --git a/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml b/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml
index d5bbc1ab88..0e7d6fcbcc 100644
--- a/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml
+++ b/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml
@@ -50,9 +50,11 @@ arch_params:
 resume: False
 training_hyperparams:
   resume: ${resume}
-  criterion_params:
-    alpha: 1.0
-    dboxes: ${dboxes}
+  loss:
+    SSDLoss:
+      alpha: 1.0
+      dboxes: ${dboxes} # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
+
 
 multi_gpu: DDP
 num_gpus: 4
diff --git a/src/super_gradients/recipes/roboflow_ppyoloe.yaml b/src/super_gradients/recipes/roboflow_ppyoloe.yaml
index c904cf96c5..2024ed8586 100644
--- a/src/super_gradients/recipes/roboflow_ppyoloe.yaml
+++ b/src/super_gradients/recipes/roboflow_ppyoloe.yaml
@@ -40,8 +40,6 @@ training_hyperparams:
   resume: ${resume}
   max_epochs: 100
   mixed_precision: True
-  criterion_params:
-    num_classes: ${num_classes}
   phase_callbacks:
     - RoboflowResultCallback:
         dataset_name: ${dataset_name}
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
index 7ed162ad83..421e3de7ad 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
@@ -12,12 +12,11 @@ lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.1
 batch_accumulate: 1
 initial_lr: 1e-3
-loss: DEKRLoss
-
-criterion_params:
-  heatmap_loss: qfl
-  heatmap_loss_factor: 1.0
-  offset_loss_factor: 0.1
+loss:
+  DEKRLoss:
+    heatmap_loss: qfl
+    heatmap_loss_factor: 1.0
+    offset_loss_factor: 0.1
 
 mixed_precision: True
 
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
index 62ce33e6f2..6b5e1d1d5a 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
@@ -13,8 +13,6 @@ cosine_final_lr_ratio: 0.1
 batch_accumulate: 1
 initial_lr: 0.001
 loss: RescoringLoss
-criterion_params: {}
-
 mixed_precision: False
 
 optimizer: AdamW
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
index 65239ffa13..41df8e3b97 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
@@ -7,11 +7,10 @@ lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.01
 batch_accumulate: 1
 initial_lr: 0.01
-loss: SSDLoss
-
-criterion_params:
-  alpha: 1.0
-  dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
+loss:
+  SSDLoss:
+    alpha: 1.0
+    dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
 
 optimizer: SGD
 optimizer_params:
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml
index fcc3fa4ba1..9fb65aef55 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml
@@ -12,11 +12,10 @@ batch_accumulate: 1
 
 save_ckpt_epoch_list: [285]
 
-loss: YoloXDetectionLoss
-
-criterion_params:
-  strides: [8, 16, 32]  # output strides of all yolo outputs
-  num_classes: 80
+loss:
+  YoloXDetectionLoss:
+    strides: [8, 16, 32]  # output strides of all yolo outputs
+    num_classes: 80
 
 
 
diff --git a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml
index 0015f58e9d..d4e24d0659 100644
--- a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml
@@ -38,7 +38,7 @@ zero_weight_decay_on_bias_and_bn: False # whether to apply weight decay on batch
 
 
 loss: # Loss function for training (str as one of SuperGradient's built in options, or torch.nn.module)
-criterion_params: {} # when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params.
+criterion_params: {} # (DEPRECATED) when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params.
 
 
 ema: False # whether to use Model Exponential Moving Average
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml
index 766b968597..0f4b8783cb 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml
@@ -20,9 +20,9 @@ ema_params:
   decay: 0.9999
   decay_type: constant
 
-loss: LabelSmoothingCrossEntropyLoss
-criterion_params:
-  smooth_eps: 0.1
+loss:
+  LabelSmoothingCrossEntropyLoss:
+    smooth_eps: 0.1
 
 
 metric_to_watch: Accuracy
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml
index 1dddb79b14..05040f677b 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml
@@ -10,9 +10,9 @@ optimizer_params:
   weight_decay: 0.00004
 
 lr_warmup_epochs: 5
-loss: LabelSmoothingCrossEntropyLoss
-criterion_params:
-  smooth_eps: 0.1
+loss:
+  LabelSmoothingCrossEntropyLoss:
+    smooth_eps: 0.1
 
 zero_weight_decay_on_bias_and_bn: True
 ema: True
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml
index b1b90729ea..3cc4c074c2 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml
@@ -20,9 +20,9 @@ ema_params:
   decay_type: constant
   decay: 0.9999
 
-loss: LabelSmoothingCrossEntropyLoss
-criterion_params:
-  smooth_eps: 0.1
+loss:
+  LabelSmoothingCrossEntropyLoss:
+    smooth_eps: 0.1
 
 
 metric_to_watch: Accuracy
diff --git a/src/super_gradients/training/params.py b/src/super_gradients/training/params.py
index 1388457841..981d807bb4 100755
--- a/src/super_gradients/training/params.py
+++ b/src/super_gradients/training/params.py
@@ -1,3 +1,4 @@
+from super_gradients.common.deprecate import deprecated_training_param, get_deprecated_nested_params_to_factory_format_assigner
 from super_gradients.training.utils import HpmStruct
 from copy import deepcopy
 
@@ -9,7 +10,6 @@
     "cosine_final_lr_ratio": 0.01,
     "optimizer": "SGD",
     "optimizer_params": {},
-    "criterion_params": {},
     "ema": False,
     "batch_accumulate": 1,  # number of batches to accumulate before every backward pass
     "ema_params": {},
@@ -115,6 +115,9 @@ def __init__(self, **entries):
         if len(entries) > 0:
             self.override(**entries)
 
+    @deprecated_training_param(
+        "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params")
+    )
     def override(self, **entries):
         super().override(**entries)
         self.validate()
diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py
index 97ecea6f9c..fb9e554827 100755
--- a/src/super_gradients/training/sg_trainer/sg_trainer.py
+++ b/src/super_gradients/training/sg_trainer/sg_trainer.py
@@ -1181,11 +1181,7 @@ def forward(self, inputs, targets):
         self.metric_to_watch = self.training_params.metric_to_watch
         self.greater_metric_to_watch_is_better = self.training_params.greater_metric_to_watch_is_better
 
-        # Allowing loading instantiated loss or string
-        if isinstance(self.training_params.loss, str):
-            self.criterion = LossesFactory().get({self.training_params.loss: self.training_params.criterion_params})
-
-        elif isinstance(self.training_params.loss, Mapping):
+        if isinstance(self.training_params.loss, Mapping) or isinstance(self.training_params.loss, str):
             self.criterion = LossesFactory().get(self.training_params.loss)
 
         elif isinstance(self.training_params.loss, nn.Module):
diff --git a/tests/integration_tests/conversion_callback_test.py b/tests/integration_tests/conversion_callback_test.py
index 22b01512cf..1f405f3c47 100644
--- a/tests/integration_tests/conversion_callback_test.py
+++ b/tests/integration_tests/conversion_callback_test.py
@@ -59,7 +59,6 @@ def test_classification_architectures(self):
                 "initial_lr": 0.1,
                 "loss": "CrossEntropyLoss",
                 "optimizer": "SGD",
-                "criterion_params": {},
                 "train_metrics_list": [Accuracy(), Top5()],
                 "valid_metrics_list": [Accuracy(), Top5()],
                 "metric_to_watch": "Accuracy",
diff --git a/tests/integration_tests/deci_lab_export_test.py b/tests/integration_tests/deci_lab_export_test.py
index 50e6132d2e..421510bb8c 100644
--- a/tests/integration_tests/deci_lab_export_test.py
+++ b/tests/integration_tests/deci_lab_export_test.py
@@ -49,7 +49,6 @@ def test_train_with_deci_lab_integration(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": self.optimizer,
-            "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
diff --git a/tests/integration_tests/ema_train_integration_test.py b/tests/integration_tests/ema_train_integration_test.py
index 3bca4b3204..6d60bfdbf2 100644
--- a/tests/integration_tests/ema_train_integration_test.py
+++ b/tests/integration_tests/ema_train_integration_test.py
@@ -55,7 +55,6 @@ def _train(self, ema_params):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "ema": True,
             "ema_params": ema_params,
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/integration_tests/pretrained_models_test.py b/tests/integration_tests/pretrained_models_test.py
index 33e172762d..bad5949876 100644
--- a/tests/integration_tests/pretrained_models_test.py
+++ b/tests/integration_tests/pretrained_models_test.py
@@ -133,8 +133,7 @@ def setUp(self) -> None:
             "cosine_final_lr_ratio": 0.01,
             "lr_warmup_epochs": 3,
             "batch_accumulate": 1,
-            "loss": "SSDLoss",
-            "criterion_params": {"dboxes": ssd_dboxes},
+            "loss": {"SSDLoss": {"dboxes": ssd_dboxes}},
             "optimizer": "SGD",
             "warmup_momentum": 0.8,
             "optimizer_params": {"momentum": 0.937, "weight_decay": 0.0005, "nesterov": True},
@@ -150,8 +149,7 @@ def setUp(self) -> None:
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "YoloXDetectionLoss",
-            "criterion_params": {"strides": [8, 16, 32], "num_classes": 5},  # output strides of all yolo outputs
+            "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 5}},
             "train_metrics_list": [],
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=5)],
             "metric_to_watch": "mAP@0.50:0.95",
diff --git a/tests/recipe_training_tests/coded_qat_launch_test.py b/tests/recipe_training_tests/coded_qat_launch_test.py
index 243d78cd53..267f6a3a05 100644
--- a/tests/recipe_training_tests/coded_qat_launch_test.py
+++ b/tests/recipe_training_tests/coded_qat_launch_test.py
@@ -22,7 +22,6 @@ def test_qat_launch(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -83,7 +82,6 @@ def test_ptq_launch(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/dataset_statistics_test.py b/tests/unit_tests/dataset_statistics_test.py
index f68fbb562e..f0d0c7c93c 100644
--- a/tests/unit_tests/dataset_statistics_test.py
+++ b/tests/unit_tests/dataset_statistics_test.py
@@ -26,8 +26,7 @@ def test_dataset_statistics_tensorboard_logger(self):
             "max_epochs": 1,  # we dont really need the actual training to run
             "lr_mode": "CosineLRScheduler",
             "initial_lr": 0.01,
-            "loss": "YoloXDetectionLoss",
-            "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},
+            "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}},
             "dataset_statistics": True,
             "launch_tensorboard": True,
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)],
diff --git a/tests/unit_tests/detection_dataset_test.py b/tests/unit_tests/detection_dataset_test.py
index 8bbde92259..1856cfabcb 100644
--- a/tests/unit_tests/detection_dataset_test.py
+++ b/tests/unit_tests/detection_dataset_test.py
@@ -173,9 +173,8 @@ def test_coco_detection_metrics_with_classwise_ap(self):
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "YoloXDetectionLoss",
+            "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}},
             "mixed_precision": False,
-            "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},  # output strides of all yolo outputs
             "train_metrics_list": [],
             "valid_metrics_list": [
                 DetectionMetrics(
diff --git a/tests/unit_tests/double_training_test.py b/tests/unit_tests/double_training_test.py
index 4a9ab0b265..8cf74a3f3f 100644
--- a/tests/unit_tests/double_training_test.py
+++ b/tests/unit_tests/double_training_test.py
@@ -29,7 +29,6 @@ def test_call_train_twice(self):
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/early_stop_test.py b/tests/unit_tests/early_stop_test.py
index 2082d2fd73..d4789b7d85 100644
--- a/tests/unit_tests/early_stop_test.py
+++ b/tests/unit_tests/early_stop_test.py
@@ -54,7 +54,6 @@ def setUp(self) -> None:
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Top5()],
diff --git a/tests/unit_tests/factories_test.py b/tests/unit_tests/factories_test.py
index e3b7babba0..7be16c729e 100644
--- a/tests/unit_tests/factories_test.py
+++ b/tests/unit_tests/factories_test.py
@@ -26,7 +26,6 @@ def test_training_with_factories(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "torch.optim.ASGD",  # use an optimizer by factory
-            "criterion_params": {},
             "optimizer_params": {"lambd": 0.0001, "alpha": 0.75},
             "train_metrics_list": ["Accuracy", "Top5"],  # use a metric by factory
             "valid_metrics_list": ["Accuracy", "Top5"],  # use a metric by factory
@@ -52,7 +51,6 @@ def test_training_with_factories_with_typos(self):
             "initial_lr": 0.1,
             "loss": "crossEnt_ropy",
             "optimizer": "AdAm_",  # use an optimizer by factory
-            "criterion_params": {},
             "train_metrics_list": ["accur_acy", "Top_5"],  # use a metric by factory
             "valid_metrics_list": ["aCCuracy", "Top5"],  # use a metric by factory
             "metric_to_watch": "Accurac_Y",
diff --git a/tests/unit_tests/forward_pass_prep_fn_test.py b/tests/unit_tests/forward_pass_prep_fn_test.py
index 57ccf27f69..8260fe0a41 100644
--- a/tests/unit_tests/forward_pass_prep_fn_test.py
+++ b/tests/unit_tests/forward_pass_prep_fn_test.py
@@ -44,7 +44,6 @@ def test_resizing_with_forward_pass_prep_fn(self):
             "initial_lr": 1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/kd_ema_test.py b/tests/unit_tests/kd_ema_test.py
index bbdf9164bd..aaa682d076 100644
--- a/tests/unit_tests/kd_ema_test.py
+++ b/tests/unit_tests/kd_ema_test.py
@@ -25,7 +25,6 @@ def setUp(cls):
             "initial_lr": 0.1,
             "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/kd_trainer_test.py b/tests/unit_tests/kd_trainer_test.py
index 98b3a37f3f..fc73fcd1d8 100644
--- a/tests/unit_tests/kd_trainer_test.py
+++ b/tests/unit_tests/kd_trainer_test.py
@@ -47,7 +47,6 @@ def setUp(cls):
             "initial_lr": 0.1,
             "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/load_ema_ckpt_test.py b/tests/unit_tests/load_ema_ckpt_test.py
index c1d1fe1d98..0db84feb83 100644
--- a/tests/unit_tests/load_ema_ckpt_test.py
+++ b/tests/unit_tests/load_ema_ckpt_test.py
@@ -28,7 +28,6 @@ def setUp(self) -> None:
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/local_ckpt_head_replacement_test.py b/tests/unit_tests/local_ckpt_head_replacement_test.py
index 0d100e364a..04659f0fb4 100644
--- a/tests/unit_tests/local_ckpt_head_replacement_test.py
+++ b/tests/unit_tests/local_ckpt_head_replacement_test.py
@@ -19,7 +19,6 @@ def test_local_ckpt_head_replacement(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/loss_loggings_test.py b/tests/unit_tests/loss_loggings_test.py
index 5294885bd1..9c44fc2237 100644
--- a/tests/unit_tests/loss_loggings_test.py
+++ b/tests/unit_tests/loss_loggings_test.py
@@ -40,7 +40,6 @@ def test_single_item_logging(self):
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -64,7 +63,6 @@ def test_multiple_unnamed_components_loss_logging(self):
             "initial_lr": 0.1,
             "loss": CriterionWithUnnamedComponents(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -88,7 +86,6 @@ def test_multiple_named_components_loss_logging(self):
             "initial_lr": 0.1,
             "loss": CriterionWithNamedComponents(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/lr_cooldown_test.py b/tests/unit_tests/lr_cooldown_test.py
index 668bc0c74f..362c2277aa 100644
--- a/tests/unit_tests/lr_cooldown_test.py
+++ b/tests/unit_tests/lr_cooldown_test.py
@@ -25,7 +25,6 @@ def test_lr_cooldown_with_lr_scheduling(self):
             "initial_lr": 1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/lr_warmup_test.py b/tests/unit_tests/lr_warmup_test.py
index 2521090499..9c6167cd35 100644
--- a/tests/unit_tests/lr_warmup_test.py
+++ b/tests/unit_tests/lr_warmup_test.py
@@ -63,7 +63,6 @@ def test_lr_warmup(self):
             "initial_lr": 1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -99,7 +98,6 @@ def test_lr_warmup_with_lr_scheduling(self):
             "initial_lr": 1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -145,7 +143,6 @@ def test_warmup_linear_batch_step(self):
             "initial_lr": 1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -192,7 +189,6 @@ def test_warmup_linear_epoch_step(self):
             "warmup_initial_lr": 4.0,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -228,7 +224,6 @@ def test_custom_lr_warmup(self):
             "lr_warmup_epochs": 3,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/max_batches_loop_break_test.py b/tests/unit_tests/max_batches_loop_break_test.py
index bbaa483e09..bb416e1b03 100644
--- a/tests/unit_tests/max_batches_loop_break_test.py
+++ b/tests/unit_tests/max_batches_loop_break_test.py
@@ -28,7 +28,6 @@ def test_max_train_batches_loop_break(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -63,7 +62,6 @@ def test_max_valid_batches_loop_break(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/optimizer_params_override_test.py b/tests/unit_tests/optimizer_params_override_test.py
index f0b250b160..97906964ee 100644
--- a/tests/unit_tests/optimizer_params_override_test.py
+++ b/tests/unit_tests/optimizer_params_override_test.py
@@ -21,7 +21,6 @@ def test_optimizer_params_partial_override(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"momentum": 0.9},
             "zero_weight_decay_on_bias_and_bn": True,
             "train_metrics_list": [Accuracy(), Top5()],
@@ -50,7 +49,6 @@ def test_optimizer_params_full_override(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "zero_weight_decay_on_bias_and_bn": True,
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/phase_context_test.py b/tests/unit_tests/phase_context_test.py
index 5fb20101c4..a11b8819da 100644
--- a/tests/unit_tests/phase_context_test.py
+++ b/tests/unit_tests/phase_context_test.py
@@ -33,7 +33,6 @@ def context_information_in_train_test(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Top5()],
diff --git a/tests/unit_tests/preprocessing_unit_test.py b/tests/unit_tests/preprocessing_unit_test.py
index 8af6f1ced3..4c15444ef4 100644
--- a/tests/unit_tests/preprocessing_unit_test.py
+++ b/tests/unit_tests/preprocessing_unit_test.py
@@ -102,8 +102,7 @@ def test_setting_preprocessing_params_from_validation_set(self):
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "YoloXDetectionLoss",
-            "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},  # output strides of all yolo outputs
+            "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}},
             "train_metrics_list": [],
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)],
             "metric_to_watch": "mAP@0.50:0.95",
@@ -173,8 +172,7 @@ def test_setting_preprocessing_params_from_checkpoint(self):
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "YoloXDetectionLoss",
-            "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},  # output strides of all yolo outputs
+            "loss": {"YoloXDetectionLoss": {"strides": [8, 16, 32], "num_classes": 80}},
             "train_metrics_list": [],
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)],
             "metric_to_watch": "mAP@0.50:0.95",
diff --git a/tests/unit_tests/resume_training_test.py b/tests/unit_tests/resume_training_test.py
index 6c8bc0b465..b08bfb0286 100644
--- a/tests/unit_tests/resume_training_test.py
+++ b/tests/unit_tests/resume_training_test.py
@@ -36,7 +36,6 @@ def test_resume_training(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -82,7 +81,6 @@ def test_resume_run_id_training(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -147,7 +145,6 @@ def test_resume_external_training(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -195,7 +192,6 @@ def test_resume_external_training_same_dir(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/save_ckpt_test.py b/tests/unit_tests/save_ckpt_test.py
index 11ae820467..3465ae7f73 100644
--- a/tests/unit_tests/save_ckpt_test.py
+++ b/tests/unit_tests/save_ckpt_test.py
@@ -18,7 +18,6 @@ def setUp(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "save_ckpt_epoch_list": [1, 3],
             "loss": "CrossEntropyLoss",
diff --git a/tests/unit_tests/test_train_with_torch_scheduler.py b/tests/unit_tests/test_train_with_torch_scheduler.py
index a561667782..155b861d45 100644
--- a/tests/unit_tests/test_train_with_torch_scheduler.py
+++ b/tests/unit_tests/test_train_with_torch_scheduler.py
@@ -29,7 +29,6 @@ def _run_scheduler_test(self, scheduler_name, scheduler_params, expected_lr, epo
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [DummyMetric()],
             "valid_metrics_list": [DummyMetric()],
diff --git a/tests/unit_tests/train_after_test_test.py b/tests/unit_tests/train_after_test_test.py
index d0a7ec085e..3239b1ea76 100644
--- a/tests/unit_tests/train_after_test_test.py
+++ b/tests/unit_tests/train_after_test_test.py
@@ -25,7 +25,6 @@ def setUp(self) -> None:
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/train_logging_test.py b/tests/unit_tests/train_logging_test.py
index 5fbb16a539..ae1c178d2e 100644
--- a/tests/unit_tests/train_logging_test.py
+++ b/tests/unit_tests/train_logging_test.py
@@ -24,7 +24,6 @@ def test_train_logging(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/train_with_intialized_param_args_test.py b/tests/unit_tests/train_with_intialized_param_args_test.py
index d1dcefbd22..be3030a0ea 100644
--- a/tests/unit_tests/train_with_intialized_param_args_test.py
+++ b/tests/unit_tests/train_with_intialized_param_args_test.py
@@ -33,7 +33,6 @@ def test_train_with_external_criterion(self):
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -57,7 +56,6 @@ def test_train_with_external_optimizer(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -83,7 +81,6 @@ def test_train_with_external_scheduler(self):
             "initial_lr": lr,
             "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
-            "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
@@ -105,7 +102,6 @@ def test_train_with_external_scheduler_class(self):
             "initial_lr": 0.3,
             "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
-            "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
@@ -130,7 +126,6 @@ def test_train_with_reduce_on_plateau(self):
             "initial_lr": lr,
             "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
-            "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5(), ToyTestClassificationMetric()],
             "metric_to_watch": "Accuracy",
@@ -153,7 +148,6 @@ def test_train_with_external_metric(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [F1Score()],
             "valid_metrics_list": [F1Score()],
@@ -183,7 +177,6 @@ def test_train_with_external_dataloaders(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [F1Score()],
             "valid_metrics_list": [F1Score()],
diff --git a/tests/unit_tests/train_with_precise_bn_test.py b/tests/unit_tests/train_with_precise_bn_test.py
index a67d87bb40..e07375d73e 100644
--- a/tests/unit_tests/train_with_precise_bn_test.py
+++ b/tests/unit_tests/train_with_precise_bn_test.py
@@ -23,7 +23,6 @@ def test_train_with_precise_bn_explicit_size(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
@@ -52,7 +51,6 @@ def test_train_with_precise_bn_implicit_size(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/unit_tests/update_param_groups_unit_test.py b/tests/unit_tests/update_param_groups_unit_test.py
index e4edd4ca02..b4a24a9a80 100644
--- a/tests/unit_tests/update_param_groups_unit_test.py
+++ b/tests/unit_tests/update_param_groups_unit_test.py
@@ -40,7 +40,6 @@ def test_lr_scheduling_with_update_param_groups(self):
             "lr_decay_factor": 1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/vit_unit_test.py b/tests/unit_tests/vit_unit_test.py
index b9a3527761..a1f2ccf18a 100644
--- a/tests/unit_tests/vit_unit_test.py
+++ b/tests/unit_tests/vit_unit_test.py
@@ -20,7 +20,6 @@ def setUp(self):
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
-            "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],

From b66f46628a785551220691d415375c3c586503c6 Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Tue, 10 Oct 2023 17:36:44 +0300
Subject: [PATCH 2/9] all refs removed

---
 src/super_gradients/recipes/cityscapes_segformer.yaml  |  6 +++---
 src/super_gradients/recipes/imagenet_resnet50_kd.yaml  | 10 +++++-----
 src/super_gradients/recipes/roboflow_yolo_nas_m.yaml   |  3 ---
 src/super_gradients/recipes/roboflow_yolo_nas_s.yaml   |  2 --
 .../cifar10_resnet_train_params.yaml                   |  1 -
 5 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/super_gradients/recipes/cityscapes_segformer.yaml b/src/super_gradients/recipes/cityscapes_segformer.yaml
index fb4d8bb227..7f3bd9b849 100644
--- a/src/super_gradients/recipes/cityscapes_segformer.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer.yaml
@@ -95,9 +95,9 @@ training_hyperparams:
 
   sync_bn: True
 
-  loss: LabelSmoothingCrossEntropyLoss
-  criterion_params:
-    ignore_index: ${cityscapes_ignored_label}
+  loss:
+    LabelSmoothingCrossEntropyLoss:
+      ignore_index: ${cityscapes_ignored_label}
 
   phase_callbacks:
     - SlidingWindowValidationCallback:
diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
index 2bc9109f46..5ebbd6a200 100644
--- a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
+++ b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
@@ -25,11 +25,11 @@ val_dataloader: imagenet_val
 resume: False
 training_hyperparams:
   resume: ${resume}
-  loss: KDLogitsLoss
-  criterion_params:
-    distillation_loss_coeff: 0.8
-    task_loss_fn:
-      _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss
+  loss:
+    KDLogitsLoss:
+      distillation_loss_coeff: 0.8
+      task_loss_fn:
+        _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss
 
 arch_params:
   teacher_input_adapter:
diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
index 2d6641e801..f0350e2ce8 100644
--- a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
+++ b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
@@ -60,9 +60,6 @@ training_hyperparams:
 
   max_epochs: 100
   mixed_precision: True
-  criterion_params:
-    num_classes: ${num_classes}
-
 
   phase_callbacks: []
   loss:
diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
index 8fb2baf901..ce9dbf4332 100644
--- a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
+++ b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
@@ -60,8 +60,6 @@ training_hyperparams:
 
   max_epochs: 100
   mixed_precision: True
-  criterion_params:
-    num_classes: ${num_classes}
 
 
   phase_callbacks: []
diff --git a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
index 0905ba57ff..ba83f94d04 100644
--- a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
@@ -15,7 +15,6 @@ lr_warmup_epochs: 0
 initial_lr: 0.1
 loss: LabelSmoothingCrossEntropyLoss
 optimizer: SGD
-criterion_params: {}
 
 optimizer_params:
   weight_decay: 1e-4

From 5192ddda65bb5e75e5dc817eea8d290b43184532 Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Wed, 11 Oct 2023 10:39:43 +0300
Subject: [PATCH 3/9] yolox ref removed

---
 src/super_gradients/recipes/roboflow_yolox.yaml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/super_gradients/recipes/roboflow_yolox.yaml b/src/super_gradients/recipes/roboflow_yolox.yaml
index 38ad33fa43..c26f4d8c2a 100644
--- a/src/super_gradients/recipes/roboflow_yolox.yaml
+++ b/src/super_gradients/recipes/roboflow_yolox.yaml
@@ -39,8 +39,11 @@ resume: False
 training_hyperparams:
   max_epochs: 100
   resume: ${resume}
-  criterion_params:
-    num_classes: ${num_classes}
+  loss:
+    YoloXDetectionLoss:
+      strides: [ 8, 16, 32 ]  # output strides of all yolo outputs
+      num_classes: ${num_classes}
+
   train_metrics_list:
     - DetectionMetrics:
         normalize_targets: True

From 9c24d6c857b310d8d385eb7a76978d782cc23a0f Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Wed, 11 Oct 2023 14:10:25 +0300
Subject: [PATCH 4/9] added tests

---
 tests/unit_tests/test_deprecations.py         | 60 ++++++++++++++++++-
 .../unit_tests/train_with_precise_bn_test.py  |  1 +
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/tests/unit_tests/test_deprecations.py b/tests/unit_tests/test_deprecations.py
index 23b0ddb9f9..dc0c25c59e 100644
--- a/tests/unit_tests/test_deprecations.py
+++ b/tests/unit_tests/test_deprecations.py
@@ -1,12 +1,17 @@
 import unittest
+import warnings
 from typing import Union
 
 from omegaconf import DictConfig
 from torch import nn
 
+from super_gradients import setup_device, Trainer
 from super_gradients.common.registry import register_model
 from super_gradients.training import models
-from super_gradients.training.models import CustomizableDetector, get_arch_params
+from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader
+from super_gradients.training.metrics import Accuracy, Top5
+from super_gradients.training.models import CustomizableDetector, get_arch_params, ResNet18
+from super_gradients.training.params import TrainingParams
 from super_gradients.training.utils import HpmStruct
 from super_gradients.training.utils.utils import arch_params_deprecated
 from super_gradients.training.transforms.transforms import DetectionTargetsFormatTransform, DetectionHorizontalFlip, DetectionPaddedRescale
@@ -98,6 +103,59 @@ def test_deprecated_HpmStruct_import(self):
         except ImportError:
             self.fail("ImportError raised unexpectedly for HpmStruct")
 
+    def test_deprecated_criterion_params(self):
+        with self.assertWarns(DeprecationWarning):
+            warnings.simplefilter("always")
+            train_params = {
+                "max_epochs": 4,
+                "lr_decay_factor": 0.1,
+                "lr_updates": [4],
+                "lr_mode": "StepLRScheduler",
+                "lr_warmup_epochs": 0,
+                "initial_lr": 0.1,
+                "loss": "CrossEntropyLoss",
+                "optimizer": "SGD",
+                "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
+                "loss": "CrossEntropyLoss",
+                "train_metrics_list": [],
+                "valid_metrics_list": [],
+                "metric_to_watch": "Accuracy",
+                "greater_metric_to_watch_is_better": True,
+            }
+            train_params = TrainingParams(**train_params)
+            train_params.override(criterion_params={"ignore_index": 0})
+
+    def test_train_with_deprecated_criterion_params(self):
+        setup_device(device="cpu")
+        trainer = Trainer("test_train_with_precise_bn_explicit_size")
+        net = ResNet18(num_classes=5, arch_params={})
+        train_params = {
+            "max_epochs": 2,
+            "lr_updates": [1],
+            "lr_decay_factor": 0.1,
+            "lr_mode": "StepLRScheduler",
+            "lr_warmup_epochs": 0,
+            "initial_lr": 0.1,
+            "loss": "CrossEntropyLoss",
+            "criterion_params": {"ignore_index": -300},
+            "optimizer": "SGD",
+            "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
+            "train_metrics_list": [Accuracy(), Top5()],
+            "valid_metrics_list": [Accuracy(), Top5()],
+            "metric_to_watch": "Accuracy",
+            "greater_metric_to_watch_is_better": True,
+            "precise_bn": True,
+            "precise_bn_batch_size": 100,
+        }
+        trainer.train(
+            model=net,
+            training_params=train_params,
+            train_loader=classification_test_dataloader(batch_size=10),
+            valid_loader=classification_test_dataloader(batch_size=10),
+        )
+
+        self.assertEqual(trainer.criterion.ignore_index, -300)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/unit_tests/train_with_precise_bn_test.py b/tests/unit_tests/train_with_precise_bn_test.py
index e07375d73e..7313aa4d74 100644
--- a/tests/unit_tests/train_with_precise_bn_test.py
+++ b/tests/unit_tests/train_with_precise_bn_test.py
@@ -22,6 +22,7 @@ def test_train_with_precise_bn_explicit_size(self):
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": "CrossEntropyLoss",
+            "criterion_params": {"ignore_index": 0},
             "optimizer": "SGD",
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "train_metrics_list": [Accuracy(), Top5()],

From f85d1fbf2c31a611c847d71768ae49986128b195 Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Wed, 11 Oct 2023 15:34:37 +0300
Subject: [PATCH 5/9] added docs

---
 src/super_gradients/common/deprecate.py | 63 +++++++++++++++++++++++++
 tests/unit_tests/test_deprecations.py   |  4 +-
 2 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/src/super_gradients/common/deprecate.py b/src/super_gradients/common/deprecate.py
index 3fbb3f2285..0f77138e47 100644
--- a/src/super_gradients/common/deprecate.py
+++ b/src/super_gradients/common/deprecate.py
@@ -79,6 +79,37 @@ def wrapper(*args, **kwargs):
 
 
 def deprecated_training_param(deprecated_tparam_name: str, deprecated_since: str, removed_from: str, new_arg_assigner: Callable, message: str = ""):
+    """
+    Decorator for deprecating training hyperparameters.
+
+    Recommended tp be used as a decorator on top of super_gradients.training.params.TrainingParams's override method:
+
+        class TrainingParams(HpmStruct):
+        def __init__(self, **entries):
+            # WE initialize by the default training params, overridden by the provided params
+            default_training_params = deepcopy(DEFAULT_TRAINING_PARAMS)
+            super().__init__(**default_training_params)
+        self.set_schema(TRAINING_PARAM_SCHEMA)
+            if len(entries) > 0:
+                self.override(**entries)
+
+    @deprecated_training_param(
+        "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params")
+    )
+    def override(self, **entries):
+        super().override(**entries)
+        self.validate()
+
+
+    :param deprecated_tparam_name: str, the name of the deprecated hyperparameter.
+    :param deprecated_since: str, SG version of deprecation.
+    :param removed_from: str, SG version of removal.
+    :param new_arg_assigner: Callable, a handler to assign the deprecated parameter value to the updated
+     hyperparameter entry.
+    :param message: str, message to append to the deprecation warning (default="")
+    :return:
+    """
+
     def decorator(func):
         def wrapper(*args, **training_params):
             if deprecated_tparam_name in training_params:
@@ -107,6 +138,38 @@ def wrapper(*args, **training_params):
 
 
 def get_deprecated_nested_params_to_factory_format_assigner(param_name: str, nested_params_name: str) -> Callable:
+    """
+    Returns an assigner to be used by deprecated_training_param decorator.
+
+    The assigner takes a deprecated parameter name, and its __init___ arguments that previously were passed
+     through nested_params_name entry in training_params and manipulates the training_params so they are in 'Factory' format.
+     For example:
+
+    class TrainingParams(HpmStruct):
+        def __init__(self, **entries):
+            # WE initialize by the default training params, overridden by the provided params
+            default_training_params = deepcopy(DEFAULT_TRAINING_PARAMS)
+            super().__init__(**default_training_params)
+        self.set_schema(TRAINING_PARAM_SCHEMA)
+            if len(entries) > 0:
+                self.override(**entries)
+
+    @deprecated_training_param(
+        "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params")
+    )
+    def override(self, **entries):
+        super().override(**entries)
+        self.validate()
+
+
+    then under the hood, training_params.loss will be set to
+     {training_params.loss: training_params.criterion_params}
+
+    :param param_name: str, parameter name (for example, 'loss').
+    :param nested_params_name: str, nested_params_name (for example, 'criterion_params')
+    :return: Callable as described above.
+    """
+
     def deprecated_nested_params_to_factory_format_assigner(**params):
         nested_params = params.get(nested_params_name)
         param_val = params.get(param_name)
diff --git a/tests/unit_tests/test_deprecations.py b/tests/unit_tests/test_deprecations.py
index dc0c25c59e..7096b1c800 100644
--- a/tests/unit_tests/test_deprecations.py
+++ b/tests/unit_tests/test_deprecations.py
@@ -5,7 +5,7 @@
 from omegaconf import DictConfig
 from torch import nn
 
-from super_gradients import setup_device, Trainer
+from super_gradients import Trainer
 from super_gradients.common.registry import register_model
 from super_gradients.training import models
 from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader
@@ -126,7 +126,6 @@ def test_deprecated_criterion_params(self):
             train_params.override(criterion_params={"ignore_index": 0})
 
     def test_train_with_deprecated_criterion_params(self):
-        setup_device(device="cpu")
         trainer = Trainer("test_train_with_precise_bn_explicit_size")
         net = ResNet18(num_classes=5, arch_params={})
         train_params = {
@@ -145,7 +144,6 @@ def test_train_with_deprecated_criterion_params(self):
             "metric_to_watch": "Accuracy",
             "greater_metric_to_watch_is_better": True,
             "precise_bn": True,
-            "precise_bn_batch_size": 100,
         }
         trainer.train(
             model=net,

From 56512cb36d62d73f7ef492b939c97d6ac14cf295 Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Wed, 11 Oct 2023 19:23:28 +0300
Subject: [PATCH 6/9] fixed test and updated factory for kdloss param

---
 src/super_gradients/training/losses/kd_losses.py | 3 +++
 tests/unit_tests/test_deprecations.py            | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/super_gradients/training/losses/kd_losses.py b/src/super_gradients/training/losses/kd_losses.py
index a42ee2c448..ee4983f4d6 100644
--- a/src/super_gradients/training/losses/kd_losses.py
+++ b/src/super_gradients/training/losses/kd_losses.py
@@ -1,6 +1,8 @@
 from torch.nn.modules.loss import _Loss, KLDivLoss
 import torch
 
+from super_gradients.common.decorators.factory_decorator import resolve_param
+from super_gradients.common.factories.losses_factory import LossesFactory
 from super_gradients.common.object_names import Losses
 from super_gradients.common.registry.registry import register_loss
 
@@ -19,6 +21,7 @@ def forward(self, student_output, teacher_output):
 class KDLogitsLoss(_Loss):
     """Knowledge distillation loss, wraps the task loss and distillation loss"""
 
+    @resolve_param("task_loss_fn", LossesFactory())
     def __init__(self, task_loss_fn: _Loss, distillation_loss_fn: _Loss = KDklDivLoss(), distillation_loss_coeff: float = 0.5):
         """
         :param task_loss_fn: task loss. E.g., CrossEntropyLoss
diff --git a/tests/unit_tests/test_deprecations.py b/tests/unit_tests/test_deprecations.py
index 7096b1c800..cf3d7c9a76 100644
--- a/tests/unit_tests/test_deprecations.py
+++ b/tests/unit_tests/test_deprecations.py
@@ -143,7 +143,6 @@ def test_train_with_deprecated_criterion_params(self):
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
             "greater_metric_to_watch_is_better": True,
-            "precise_bn": True,
         }
         trainer.train(
             model=net,

From e2e1566159629957718d0108570dcba40acd604b Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Wed, 11 Oct 2023 19:26:08 +0300
Subject: [PATCH 7/9] fixed yaml celoss ref

---
 src/super_gradients/recipes/imagenet_resnet50_kd.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
index 5ebbd6a200..bd6077335a 100644
--- a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
+++ b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
@@ -28,8 +28,7 @@ training_hyperparams:
   loss:
     KDLogitsLoss:
       distillation_loss_coeff: 0.8
-      task_loss_fn:
-        _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss
+      task_loss_fn: CrossEntropyLoss
 
 arch_params:
   teacher_input_adapter:

From 5b8f8030c6fc0b98cc2eea7e48f1a4361f8b3aa0 Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Wed, 11 Oct 2023 21:02:39 +0300
Subject: [PATCH 8/9] fixed unittest

---
 tests/unit_tests/training_params_factory_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit_tests/training_params_factory_test.py b/tests/unit_tests/training_params_factory_test.py
index 5e30984841..02ed0e57b7 100644
--- a/tests/unit_tests/training_params_factory_test.py
+++ b/tests/unit_tests/training_params_factory_test.py
@@ -5,7 +5,7 @@
 class TrainingParamsTest(unittest.TestCase):
     def test_get_train_params(self):
         train_params = training_hyperparams.coco2017_yolox_train_params()
-        self.assertTrue(train_params["loss"] == "YoloXDetectionLoss")
+        self.assertTrue(list(train_params["loss"].keys())[0] == "YoloXDetectionLoss")
         self.assertTrue(train_params["max_epochs"] == 300)
 
     def test_get_train_params_with_overrides(self):

From b25434de92b0539771d17464fac34cade59337df Mon Sep 17 00:00:00 2001
From: shayaharon <shay.aharon@deci.ai>
Date: Thu, 12 Oct 2023 11:52:52 +0300
Subject: [PATCH 9/9] fixed last unit test

---
 tests/unit_tests/training_params_factory_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit_tests/training_params_factory_test.py b/tests/unit_tests/training_params_factory_test.py
index 02ed0e57b7..77f46e8f75 100644
--- a/tests/unit_tests/training_params_factory_test.py
+++ b/tests/unit_tests/training_params_factory_test.py
@@ -10,7 +10,7 @@ def test_get_train_params(self):
 
     def test_get_train_params_with_overrides(self):
         train_params = training_hyperparams.coco2017_yolox_train_params(overriding_params={"max_epochs": 5})
-        self.assertTrue(train_params["loss"] == "YoloXDetectionLoss")
+        self.assertTrue(list(train_params["loss"].keys())[0] == "YoloXDetectionLoss")
         self.assertTrue(train_params["max_epochs"] == 5)