Skip to content

Commit

Permalink
Merge branch 'master' into hotfix/SG-000-add_images_for_examples
Browse files Browse the repository at this point in the history
  • Loading branch information
Louis-Dupont committed Oct 30, 2023
2 parents a4174d6 + 40b1f2c commit b8cdf74
Show file tree
Hide file tree
Showing 11 changed files with 1,361 additions and 11 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ jobs:
python3.8 src/super_gradients/train_from_recipe.py --config-name=imagenet_resnet50 batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/train_from_recipe.py --config-name=imagenet_vit_base batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/train_from_kd_recipe.py --config-name=imagenet_resnet50_kd batch_size=8 val_batch_size=8 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/convert_recipe_to_code.py cifar10_resnet.yaml train_cifar10_resnet.py && python3.8 train_cifar10_resnet.py
- run:
name: Remove new environment when failed
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ sweeper_test:

# Here you define a list of notebooks we want to execute and convert to markdown files
# NOTEBOOKS = hellomake.ipynb hellofunc.ipynb helloclass.ipynb
NOTEBOOKS = src/super_gradients/examples/model_export/models_export.ipynb
NOTEBOOKS = src/super_gradients/examples/model_export/models_export.ipynb notebooks/what_are_recipes_and_how_to_use.ipynb

# This Makefile target runs notebooks listed below and converts them to markdown files in documentation/source/
run_and_convert_notebooks_to_docs: $(NOTEBOOKS)
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,9 @@ Recipes support out of the box every model, metric or loss that is implemented i

<table class="tfo-notebook-buttons" align="left">
<td width="500">
<a target="_blank" href="https://bit.ly/3UiY5ab"><img src="./documentation/assets/SG_img/colab_logo.png" /> How to Use Recipes</a>
<a target="_blank" href="https://colab.research.google.com/github/Deci-AI/super-gradients/blob/master/notebooks/what_are_recipes_and_how_to_use.ipynb">
<img src="./documentation/assets/SG_img/colab_logo.png" /> How to Use Recipes
</a>
</td>
</table>
</br></br>
Expand Down
955 changes: 955 additions & 0 deletions notebooks/what_are_recipes_and_how_to_use.ipynb

Large diffs are not rendered by default.

239 changes: 239 additions & 0 deletions src/super_gradients/convert_recipe_to_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
"""
Entry point for converting recipe file to self-contained train.py file.
Convert a recipe YAML file to a self-contained <train.py> file that can be run with python <train.py>.
Generated file will contain all training hyperparameters from input recipe file but will be self-contained (no dependencies on original recipe).
Limitations: Converting a recipe with command-line overrides of some parameters in this recipe is not supported.
General use: python -m super_gradients.convert_recipe_to_code DESIRED_RECIPE OUTPUT_SCRIPT
Example: python -m super_gradients.convert_recipe_to_code coco2017_yolo_nas_s train_coco2017_yolo_nas_s.py
For recipe's specific instructions and details refer to the recipe's configuration file in the recipes' directory.
"""
import argparse
import collections
import os.path
import pathlib
from typing import Tuple, Mapping, Dict, Union, Optional

import hydra
import pkg_resources
from hydra.core.global_hydra import GlobalHydra
from omegaconf import DictConfig, OmegaConf, ListConfig

from super_gradients import Trainer
from super_gradients.common import MultiGPUMode
from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.common.environment.omegaconf_utils import register_hydra_resolvers
from super_gradients.common.environment.path_utils import normalize_path
from super_gradients.training.utils import get_param

logger = get_logger(__name__)


def try_import_black():
"""
Attempts to import black code formatter.
If black is not installed, it will attempt to install it with pip.
If installation fails, it will return None
"""
try:
import black

return black
except ImportError:
logger.info("Trying to install black using pip to enable formatting of the generated script.")
try:
import pip

pip.main(["install", "black==22.10.0"])
import black

logger.info("Black installed via pip. ")
return black
except Exception:
logger.info("Black installation failed. Formatting of the generated script will be disabled.")
return None


def recursively_walk_and_extract_hydra_targets(
cfg: DictConfig, objects: Optional[Mapping] = None, prefix: Optional[str] = None
) -> Tuple[DictConfig, Dict[str, Mapping]]:
"""
Iterates over the input config, extracts all hydra targets present in it and replace them with variable references.
Extracted hydra targets are stored in the objects dictionary (Used to generated instantiations of the objects in the generated script).
:param cfg: Input config
:param objects: Dictionary of extracted hydra targets
:param prefix: A prefix variable to track the path to the current config (Used to give variables meaningful name)
:return: A new config and the dictionary of objects that must be created in the generated script
"""
if objects is None:
objects = collections.OrderedDict()
if prefix is None:
prefix = ""

if isinstance(cfg, DictConfig):
for key, value in cfg.items():
value, objects = recursively_walk_and_extract_hydra_targets(value, objects, prefix=f"{prefix}_{key}")
cfg[key] = value

if "_target_" in cfg:
target_class = cfg["_target_"]
target_params = dict([(k, v) for k, v in cfg.items() if k != "_target_"])
object_name = f"{prefix}".replace(".", "_").lower()
objects[object_name] = (target_class, target_params)
cfg = object_name

elif isinstance(cfg, ListConfig):
for index, item in enumerate(cfg):
item, objects = recursively_walk_and_extract_hydra_targets(item, objects, prefix=f"{prefix}_{index}")
cfg[index] = item
else:
pass
return cfg, objects


def convert_recipe_to_code(config_name: Union[str, pathlib.Path], config_dir: Union[str, pathlib.Path], output_script_path: Union[str, pathlib.Path]) -> None:
"""
Convert a recipe YAML file to a self-contained <train.py> file that can be run with python <train.py>.
Generated file will contain all training hyperparameters from input recipe file but will be self-contained (no dependencies on original recipe).
Limitations: Converting a recipe with command-line overrides of some paramters in this recipe is not supported.
:param config_name: Name of the recipe file (can be with or without .yaml extension)
:param config_dir: Directory where the recipe file is located
:param output_script_path: Path to the output .py file
:return: None
"""
config_name = str(config_name)
config_dir = str(config_dir)
output_script_path = str(output_script_path)

register_hydra_resolvers()
GlobalHydra.instance().clear()
with hydra.initialize_config_dir(config_dir=normalize_path(config_dir), version_base="1.2"):
cfg = hydra.compose(config_name=config_name)

cfg = Trainer._trigger_cfg_modifying_callbacks(cfg)
OmegaConf.resolve(cfg)

device = get_param(cfg, "device")
multi_gpu = get_param(cfg, "multi_gpu")

if multi_gpu is False:
multi_gpu = MultiGPUMode.OFF
num_gpus = get_param(cfg, "num_gpus")

train_dataloader = get_param(cfg, "train_dataloader")
train_dataset_params = OmegaConf.to_container(cfg.dataset_params.train_dataset_params, resolve=True)
train_dataloader_params = OmegaConf.to_container(cfg.dataset_params.train_dataloader_params, resolve=True)

val_dataloader = get_param(cfg, "val_dataloader")
val_dataset_params = OmegaConf.to_container(cfg.dataset_params.val_dataset_params, resolve=True)
val_dataloader_params = OmegaConf.to_container(cfg.dataset_params.val_dataloader_params, resolve=True)

num_classes = cfg.arch_params.num_classes
arch_params = OmegaConf.to_container(cfg.arch_params, resolve=True)

strict_load = cfg.checkpoint_params.strict_load
if isinstance(strict_load, Mapping) and "_target_" in strict_load:
strict_load = hydra.utils.instantiate(strict_load)

training_hyperparams, hydra_instantiated_objects = recursively_walk_and_extract_hydra_targets(cfg.training_hyperparams)

checkpoint_num_classes = get_param(cfg.checkpoint_params, "checkpoint_num_classes")
content = f"""
import super_gradients
from super_gradients import init_trainer, Trainer
from super_gradients.training.utils.distributed_training_utils import setup_device
from super_gradients.training import models, dataloaders
from super_gradients.common.data_types.enum import MultiGPUMode, StrictLoad
import numpy as np
def main():
init_trainer()
setup_device(device={device}, multi_gpu="{multi_gpu}", num_gpus={num_gpus})
trainer = Trainer(experiment_name="{cfg.experiment_name}", ckpt_root_dir="{cfg.ckpt_root_dir}")
num_classes = {num_classes}
arch_params = {arch_params}
model = models.get(
model_name="{cfg.architecture}",
num_classes=num_classes,
arch_params=arch_params,
strict_load={strict_load},
pretrained_weights={cfg.checkpoint_params.pretrained_weights},
checkpoint_path={cfg.checkpoint_params.checkpoint_path},
load_backbone={cfg.checkpoint_params.load_backbone},
checkpoint_num_classes={checkpoint_num_classes},
)
train_dataloader = dataloaders.get(
name={train_dataloader},
dataset_params={train_dataset_params},
dataloader_params={train_dataloader_params},
)
val_dataloader = dataloaders.get(
name={val_dataloader},
dataset_params={val_dataset_params},
dataloader_params={val_dataloader_params},
)
"""
for name, (class_name, class_params) in hydra_instantiated_objects.items():
class_params_str = []
for k, v in class_params.items():
class_params_str.append(f"{k}={v}")
class_params_str = ",".join(class_params_str)
content += f" {name} = {class_name}({class_params_str})\n\n"

content += f"""
training_hyperparams = {training_hyperparams}
# TRAIN
result = trainer.train(
model=model,
train_loader=train_dataloader,
valid_loader=val_dataloader,
training_params=training_hyperparams,
)
print(result)
if __name__ == "__main__":
main()
"""
# Remove quotes from dict values to reference them as variables
for key in hydra_instantiated_objects.keys():
key_to_search = f"'{key}'"
key_to_replace_with = f"{key}"
content = content.replace(key_to_search, key_to_replace_with)

with open(output_script_path, "w") as f:
black = try_import_black()
if black is not None:
content = black.format_str(content, mode=black.FileMode(line_length=160))
f.write(content)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("config_name", type=str, help=".yaml filename")
parser.add_argument("save_path", type=str, default=None, help="Destination path to the output .py file")
parser.add_argument("--config_dir", type=str, default=pkg_resources.resource_filename("super_gradients.recipes", ""), help="The config directory path")
args = parser.parse_args()

save_path = args.save_path or os.path.splitext(os.path.basename(args.config_name))[0] + ".py"
logger.info(f"Saving recipe script to {save_path}")

convert_recipe_to_code(args.config_name, args.config_dir, save_path)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion src/super_gradients/training/sg_trainer/sg_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ def _save_checkpoint(
metric = validation_results_dict[self.metric_to_watch]

# BUILD THE state_dict
state = {"net": unwrap_model(self.net).state_dict(), "acc": metric, "epoch": epoch}
state = {"net": unwrap_model(self.net).state_dict(), "acc": metric, "epoch": epoch, "packages": get_installed_packages()}

if optimizer is not None:
state["optimizer_state_dict"] = optimizer.state_dict()
Expand Down
4 changes: 4 additions & 0 deletions tests/deci_core_unit_test_suite_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
TestMixedPrecisionDisabled,
)
from tests.end_to_end_tests import TestTrainer
from tests.unit_tests.test_convert_recipe_to_code import TestConvertRecipeToCode
from tests.unit_tests.detection_utils_test import TestDetectionUtils
from tests.unit_tests.detection_dataset_test import DetectionDatasetTest, TestParseYoloLabelFile
from tests.unit_tests.export_detection_model_test import TestDetectionModelExport
Expand All @@ -48,6 +49,7 @@
from tests.unit_tests.test_deprecations import DeprecationsUnitTest
from tests.unit_tests.test_min_samples_single_node import TestMinSamplesSingleNode
from tests.unit_tests.test_train_with_torch_scheduler import TrainWithTorchSchedulerTest
from tests.unit_tests.test_version_check import TestVersionCheck
from tests.unit_tests.test_yolo_nas_pose import YoloNASPoseTests
from tests.unit_tests.train_with_intialized_param_args_test import TrainWithInitializedObjectsTest
from tests.unit_tests.pretrained_models_unit_test import PretrainedModelsUnitTest
Expand Down Expand Up @@ -166,6 +168,8 @@ def _add_modules_to_unit_tests_suite(self):
self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(PoseEstimationSampleTest))
self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestMixedPrecisionDisabled))
self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(DynamicModelTests))
self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestConvertRecipeToCode))
self.unit_tests_suite.addTest(self.test_loader.loadTestsFromModule(TestVersionCheck))

def _add_modules_to_end_to_end_tests_suite(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/end_to_end_tests/trainer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_checkpoint_content(self):
ckpt_paths = [os.path.join(trainer.checkpoints_dir_path, suf) for suf in ckpt_filename]
for ckpt_path in ckpt_paths:
ckpt = torch.load(ckpt_path)
self.assertListEqual(["net", "acc", "epoch", "optimizer_state_dict", "scaler_state_dict"], list(ckpt.keys()))
self.assertListEqual(sorted(["net", "acc", "epoch", "optimizer_state_dict", "scaler_state_dict", "packages"]), sorted(list(ckpt.keys())))
trainer._save_checkpoint()
weights_only = torch.load(os.path.join(trainer.checkpoints_dir_path, "ckpt_latest_weights_only.pth"))
self.assertListEqual(["net"], list(weights_only.keys()))
Expand Down
Loading

0 comments on commit b8cdf74

Please sign in to comment.