Skip to content

Commit

Permalink
Feature/sg 128 kd recipe resnet50 (#213)
Browse files Browse the repository at this point in the history
* add kd train with resnet50

* add kd train with resnet50

* wip

* update acc and s3 path

* wip

* split train_from_recipe

* change import

* load new resnet50 weights

* wip

* changes
  • Loading branch information
shanibenbaruch authored Jun 8, 2022
1 parent bd56ade commit adf5dca
Show file tree
Hide file tree
Showing 14 changed files with 192 additions and 11 deletions.
7 changes: 4 additions & 3 deletions src/super_gradients/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from super_gradients.training import ARCHITECTURES, losses, utils, datasets_utils, DataAugmentation, \
TestDatasetInterface, SegmentationTestDatasetInterface, DetectionTestDatasetInterface, ClassificationTestDatasetInterface, SgModel
TestDatasetInterface, SegmentationTestDatasetInterface, DetectionTestDatasetInterface, ClassificationTestDatasetInterface, SgModel, KDModel
from super_gradients.common import init_trainer, is_distributed
from super_gradients.examples.train_from_recipe_example import train_from_recipe
from super_gradients.examples.train_from_kd_recipe_example import train_from_kd_recipe

__all__ = ['ARCHITECTURES', 'losses', 'utils', 'datasets_utils', 'DataAugmentation',
'TestDatasetInterface', 'SgModel', 'SegmentationTestDatasetInterface', 'DetectionTestDatasetInterface',
'ClassificationTestDatasetInterface', 'init_trainer', 'is_distributed', 'train_from_recipe']
'TestDatasetInterface', 'SgModel', 'KDModel', 'SegmentationTestDatasetInterface', 'DetectionTestDatasetInterface',
'ClassificationTestDatasetInterface', 'init_trainer', 'is_distributed', 'train_from_recipe', 'train_from_kd_recipe']
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Example code for running SuperGradient's recipes.
General use: python train_from_kd_recipe.py --config-name="DESIRED_RECIPE".
For recipe's specific instructions and details refer to the recipe's configuration file in the recipes directory.
"""

import super_gradients
from omegaconf import DictConfig
import hydra
import pkg_resources
from super_gradients.training.kd_trainer import KDTrainer


@hydra.main(config_path=pkg_resources.resource_filename("super_gradients.recipes", ""))
def main(cfg: DictConfig) -> None:
KDTrainer.train(cfg)


if __name__ == "__main__":
super_gradients.init_trainer()
main()
88 changes: 88 additions & 0 deletions src/super_gradients/recipes/imagenet_resnet50_kd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# ResNet50 Imagenet classification training:
# This example trains with batch_size = 192 * 8 GPUs, total 1536.
# Training time on 8 x GeForce RTX A5000 is 9min / epoch.
# Reach => 81.91 Top1 accuracy.
#
# Log and tensorboard at s3://deci-pretrained-models/KD_ResNet50_Beit_Base_ImageNet/average_model.pth

# Instructions:
# running from the command line, set the PYTHONPATH environment variable: (Replace "YOUR_LOCAL_PATH" with the path to the downloaded repo):
# export PYTHONPATH="YOUR_LOCAL_PATH"/super_gradients/:"YOUR_LOCAL_PATH"/super_gradients/src/
# Then:
# python train_from_recipe_example/train_from_kd_recipe.py --config-name=imagenet_resnet50_kd

defaults:
- training_hyperparams: imagenet_resnet50_kd_train_params
- dataset_params: imagenet_dataset_params
- arch_params: default_arch_params
- checkpoint_params: default_checkpoint_params

training_hyperparams:
loss: kd_loss
criterion_params:
distillation_loss_coeff: 0.8
task_loss_fn:
_target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss

arch_params:
teacher_input_adapter:
_target_: super_gradients.training.utils.kd_model_utils.NormalizationAdapter
mean_original: [0.485, 0.456, 0.406]
std_original: [0.229, 0.224, 0.225]
mean_required: [0.5, 0.5, 0.5]
std_required: [0.5, 0.5, 0.5]

student_arch_params:
num_classes: 1000

teacher_arch_params:
num_classes: 1000
image_size: [224, 224]
patch_size: [16, 16]

dataset_params:
batch_size: 192
val_batch_size: 256
random_erase_prob: 0
random_erase_value: random
train_interpolation: random
rand_augment_config_string: rand-m7-mstd0.5
cutmix: True
cutmix_params:
mixup_alpha: 0.2
cutmix_alpha: 1.0
label_smoothing: 0.1
aug_repeat_count: 3

dataset_interface:
imagenet:
dataset_params: ${dataset_params}

data_loader_num_workers: 8

model_checkpoints_location: local
load_checkpoint: False
checkpoint_params:
load_checkpoint: ${load_checkpoint}
teacher_pretrained_weights: imagenet

run_teacher_on_eval: True

experiment_name: resnet50_imagenet_KD_Model

ckpt_root_dir:

multi_gpu:
_target_: super_gradients.training.sg_model.MultiGPUMode
value: DDP

sg_model:
_target_: super_gradients.KDModel
experiment_name: ${experiment_name}
model_checkpoints_location: ${model_checkpoints_location}
ckpt_root_dir: ${ckpt_root_dir}
multi_gpu: ${multi_gpu}

architecture: kd_module
student_architecture: resnet50
teacher_architecture: beit_base_patch16_224
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defaults:
- default_train_params

max_epochs: 610
initial_lr: 5e-3
lr_mode: cosine
lr_warmup_epochs: 5
lr_cooldown_epochs: 10
ema: True
mixed_precision: True
zero_weight_decay_on_bias_and_bn: True
optimizer: Lamb
optimizer_params:
weight_decay: 0.02
loss: cross_entropy
train_metrics_list: # metrics for evaluation
- _target_: super_gradients.training.metrics.Accuracy
- _target_: super_gradients.training.metrics.Top5
valid_metrics_list: # metrics for evaluation
- _target_: super_gradients.training.metrics.Accuracy
- _target_: super_gradients.training.metrics.Top5
loss_logging_items_names: ["Loss", "Task Loss", "Distillation Loss"]

_convert_: all
3 changes: 2 additions & 1 deletion src/super_gradients/training/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from super_gradients.training.models import ARCHITECTURES
from super_gradients.training.sg_model import SgModel, \
MultiGPUMode, StrictLoad
from super_gradients.training.kd_model import KDModel

__all__ = ['distributed_training_utils', 'datasets_utils', 'DataAugmentation', 'DetectionDataSet', 'TestDatasetInterface',
'ARCHITECTURES', 'SgModel', 'MultiGPUMode', 'TestDatasetInterface', 'SegmentationTestDatasetInterface', 'DetectionTestDatasetInterface', 'ClassificationTestDatasetInterface', 'StrictLoad']
'ARCHITECTURES', 'SgModel', 'KDModel', 'MultiGPUMode', 'TestDatasetInterface', 'SegmentationTestDatasetInterface', 'DetectionTestDatasetInterface', 'ClassificationTestDatasetInterface', 'StrictLoad']
5 changes: 5 additions & 0 deletions src/super_gradients/training/kd_model/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# PACKAGE IMPORTS FOR EXTERNAL USAGE

from super_gradients.training.kd_model.kd_model import KDModel

__all__ = ['KDModel']
16 changes: 16 additions & 0 deletions src/super_gradients/training/kd_trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from super_gradients.training.trainer import Trainer


class KDTrainer(Trainer):
"""
Class for running SuperGradient's recipes for KD Models.
See train_from_kd_recipe example in the examples directory to demonstrate it's usage.
"""

@classmethod
def build_model(cls, cfg):
cfg.sg_model.build_model(student_architecture=cfg.student_architecture,
teacher_architecture=cfg.teacher_architecture,
arch_params=cfg.arch_params, student_arch_params=cfg.student_arch_params,
teacher_arch_params=cfg.teacher_arch_params,
checkpoint_params=cfg.checkpoint_params, run_teacher_on_eval=cfg.run_teacher_on_eval)
3 changes: 2 additions & 1 deletion src/super_gradients/training/losses/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from super_gradients.training.losses.focal_loss import FocalLoss
from super_gradients.training.losses.kd_losses import KDLogitsLoss
from super_gradients.training.losses.label_smoothing_cross_entropy_loss import LabelSmoothingCrossEntropyLoss
from super_gradients.training.losses.r_squared_loss import RSquaredLoss
from super_gradients.training.losses.shelfnet_ohem_loss import ShelfNetOHEMLoss
Expand All @@ -10,4 +11,4 @@
from super_gradients.training.losses.all_losses import LOSSES

__all__ = ['FocalLoss', 'LabelSmoothingCrossEntropyLoss', 'ShelfNetOHEMLoss', 'ShelfNetSemanticEncodingLoss',
'YoLoV3DetectionLoss', 'YoLoV5DetectionLoss', 'RSquaredLoss', 'SSDLoss', 'LOSSES', 'BCEDiceLoss']
'YoLoV3DetectionLoss', 'YoLoV5DetectionLoss', 'RSquaredLoss', 'SSDLoss', 'LOSSES', 'BCEDiceLoss', 'KDLogitsLoss']
4 changes: 3 additions & 1 deletion src/super_gradients/training/losses/all_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from super_gradients.training.losses import LabelSmoothingCrossEntropyLoss, YoLoV3DetectionLoss, ShelfNetOHEMLoss, \
ShelfNetSemanticEncodingLoss, RSquaredLoss, YoLoV5DetectionLoss, SSDLoss, BCEDiceLoss
from super_gradients.training.losses.kd_losses import KDLogitsLoss
from super_gradients.training.losses.stdc_loss import STDCLoss

LOSSES = {"cross_entropy": LabelSmoothingCrossEntropyLoss,
Expand All @@ -13,5 +14,6 @@
"yolo_v5_loss": YoLoV5DetectionLoss,
"ssd_loss": SSDLoss,
"stdc_loss": STDCLoss,
"bce_dice_loss": BCEDiceLoss
"bce_dice_loss": BCEDiceLoss,
"kd_loss": KDLogitsLoss
}
2 changes: 1 addition & 1 deletion src/super_gradients/training/pretrained_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"regnetY400_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/RegnetY400/average_model_regnety400.pth",
"regnetY200_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/RegnetY200/average_model_regnety200.pth",

"resnet50_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/ResNet50_ImageNet/average_model.pth",
"resnet50_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/KD_ResNet50_Beit_Base_ImageNet/resnet.pth",
"resnet34_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/resent_34/average_model.pth",
"resnet18_imagenet": "https://deci-pretrained-models.s3.amazonaws.com/resnet18/average_model.pth",

Expand Down
10 changes: 7 additions & 3 deletions src/super_gradients/training/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def scale_params_for_yolov5(cfg):
logger.info(log_msg)
return cfg

@staticmethod
def train(cfg: DictConfig) -> None:
@classmethod
def train(cls, cfg: DictConfig) -> None:
"""
Trains according to cfg recipe configuration.
Expand All @@ -99,11 +99,15 @@ def train(cfg: DictConfig) -> None:
cfg.sg_model.connect_dataset_interface(cfg.dataset_interface, data_loader_num_workers=cfg.data_loader_num_workers)

# BUILD NETWORK
cfg.sg_model.build_model(cfg.architecture, arch_params=cfg.arch_params, checkpoint_params=cfg.checkpoint_params)
cls.build_model(cfg)

# FIXME: REMOVE PARAMETER MANIPULATION SPECIFIC FOR YOLO
if str(cfg.architecture).startswith("yolo_v5"):
cfg = Trainer.scale_params_for_yolov5(cfg)

# TRAIN
cfg.sg_model.train(training_params=cfg.training_hyperparams)

@classmethod
def build_model(cls, cfg):
cfg.sg_model.build_model(cfg.architecture, arch_params=cfg.arch_params, checkpoint_params=cfg.checkpoint_params)
17 changes: 17 additions & 0 deletions src/super_gradients/training/utils/kd_model_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import torch


class NormalizationAdapter(torch.nn.Module):
def __init__(self, mean_original, std_original, mean_required, std_required):
super(NormalizationAdapter, self).__init__()
mean_original = torch.tensor(mean_original).unsqueeze(-1).unsqueeze(-1)
std_original = torch.tensor(std_original).unsqueeze(-1).unsqueeze(-1)
mean_required = torch.tensor(mean_required).unsqueeze(-1).unsqueeze(-1)
std_required = torch.tensor(std_required).unsqueeze(-1).unsqueeze(-1)

self.additive = torch.nn.Parameter((mean_original - mean_required) / std_original)
self.multiplier = torch.nn.Parameter(std_original / std_required)

def forward(self, x):
x = (x + self.additive) * self.multiplier
return x
2 changes: 1 addition & 1 deletion tests/integration_tests/pretrained_models_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def setUp(self) -> None:

self.imagenet21k_pretrained_ckpt_params = {"pretrained_weights": "imagenet21k"}

self.imagenet_pretrained_accuracies = {"resnet50": 0.7947,
self.imagenet_pretrained_accuracies = {"resnet50": 0.8191,
"resnet34": 0.7413,
"resnet18": 0.706,
"repvgg_a0": 0.7205,
Expand Down

0 comments on commit adf5dca

Please sign in to comment.