From a8d113b9b420e0e403934af0a7fc2ae45c3da6e7 Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Wed, 19 Apr 2023 11:20:55 +0300 Subject: [PATCH 1/6] first draft --- src/super_gradients/common/object_names.py | 2 + ...ction_yolo_format_base_dataset_params.yaml | 80 +++++++++++++++++++ .../training/dataloaders/dataloaders.py | 24 +++++- .../detection_datasets/detection_dataset.py | 2 +- .../yolo_format_detection.py | 22 +++-- 5 files changed, 121 insertions(+), 9 deletions(-) create mode 100644 src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml diff --git a/src/super_gradients/common/object_names.py b/src/super_gradients/common/object_names.py index 47870aa06c..3c3f4d906f 100644 --- a/src/super_gradients/common/object_names.py +++ b/src/super_gradients/common/object_names.py @@ -334,6 +334,8 @@ class Dataloaders: COCO2017_VAL_SSD_LITE_MOBILENET_V2 = "coco2017_val_ssd_lite_mobilenet_v2" COCO2017_POSE_TRAIN = "coco2017_pose_train" COCO2017_POSE_VAL = "coco2017_pose_val" + COCO_YOLO_FORMAT_TRAIN = "coco_yolo_format_train" + COCO_YOLO_FORMAT_VAL = "coco_yolo_format_val" IMAGENET_TRAIN = "imagenet_train" IMAGENET_VAL = "imagenet_val" IMAGENET_EFFICIENTNET_TRAIN = "imagenet_efficientnet_train" diff --git a/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml b/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml new file mode 100644 index 0000000000..1e359643ab --- /dev/null +++ b/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml @@ -0,0 +1,80 @@ + +train_dataset_params: + data_dir: /data/coco # TO FILL: Where the data is stored. + images_dir: images/train2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`. + labels_dir: labels/train2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`. + classes: [] + input_dim: [640, 640] + cache_dir: + cache: False + transforms: + - DetectionMosaic: + input_dim: ${dataset_params.train_dataset_params.input_dim} + prob: 1. + - DetectionRandomAffine: + degrees: 10. # rotation degrees, randomly sampled from [-degrees, degrees] + translate: 0.1 # image translation fraction + scales: [ 0.1, 2 ] # random rescale range (keeps size by padding/cropping) after mosaic transform. + shear: 2.0 # shear degrees, randomly sampled from [-degrees, degrees] + target_size: ${dataset_params.train_dataset_params.input_dim} + filter_box_candidates: True # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio. + wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels) + area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True + ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True + - DetectionMixup: + input_dim: ${dataset_params.train_dataset_params.input_dim} + mixup_scale: [ 0.5, 1.5 ] # random rescale range for the additional sample in mixup + prob: 1.0 # probability to apply per-sample mixup + flip_prob: 0.5 # probability to apply horizontal flip + - DetectionHSV: + prob: 1.0 # probability to apply HSV transform + hgain: 5 # HSV transform hue gain (randomly sampled from [-hgain, hgain]) + sgain: 30 # HSV transform saturation gain (randomly sampled from [-sgain, sgain]) + vgain: 30 # HSV transform value gain (randomly sampled from [-vgain, vgain]) + - DetectionHorizontalFlip: + prob: 0.5 # probability to apply horizontal flip + - DetectionPaddedRescale: + input_dim: ${dataset_params.train_dataset_params.input_dim} + max_targets: 120 + - DetectionTargetsFormatTransform: + input_dim: ${dataset_params.train_dataset_params.input_dim} + output_format: LABEL_CXCYWH + class_inclusion_list: + max_num_samples: + +train_dataloader_params: + batch_size: 25 + num_workers: 8 + shuffle: True + drop_last: True + pin_memory: True + collate_fn: + _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN + +val_dataset_params: + data_dir: /data/coco # TO FILL: Where the data is stored. + images_dir: images/val2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`. + labels_dir: labels/val2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`. + classes: [] # TO FILL: Class names + input_dim: [640, 640] + cache_dir: + cache: False + transforms: + - DetectionPaddedRescale: + input_dim: ${dataset_params.val_dataset_params.input_dim} + - DetectionTargetsFormatTransform: + max_targets: 50 + input_dim: ${dataset_params.val_dataset_params.input_dim} + output_format: LABEL_CXCYWH + class_inclusion_list: + max_num_samples: + +val_dataloader_params: + batch_size: 25 + num_workers: 8 + drop_last: False + pin_memory: True + collate_fn: + _target_: super_gradients.training.utils.detection_utils.DetectionCollateFN + +_convert_: all diff --git a/src/super_gradients/training/dataloaders/dataloaders.py b/src/super_gradients/training/dataloaders/dataloaders.py index 3b5ea4c44b..cab1de9edb 100644 --- a/src/super_gradients/training/dataloaders/dataloaders.py +++ b/src/super_gradients/training/dataloaders/dataloaders.py @@ -17,7 +17,7 @@ Cifar10, Cifar100, ) -from super_gradients.training.datasets.detection_datasets import COCODetectionDataset, RoboflowDetectionDataset +from super_gradients.training.datasets.detection_datasets import COCODetectionDataset, RoboflowDetectionDataset, YoloDarknetFormatDetectionDataset from super_gradients.training.datasets.detection_datasets.pascal_voc_detection import ( PascalVOCUnifiedDetectionTrainDataset, PascalVOCDetectionDataset, @@ -270,6 +270,28 @@ def roboflow_val_yolox(dataset_params: Dict = None, dataloader_params: Dict = No ) +@register_dataloader(Dataloaders.COCO_YOLO_FORMAT_TRAIN) +def coco_yolo_format_train(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: + return get_data_loader( + config_name="coco_detection_yolo_format_base_dataset_params", + dataset_cls=YoloDarknetFormatDetectionDataset, + train=True, + dataset_params=dataset_params, + dataloader_params=dataloader_params, + ) + + +@register_dataloader(Dataloaders.COCO_YOLO_FORMAT_VAL) +def coco_yolo_format_val(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: + return get_data_loader( + config_name="coco_detection_yolo_format_base_dataset_params", + dataset_cls=YoloDarknetFormatDetectionDataset, + train=False, + dataset_params=dataset_params, + dataloader_params=dataloader_params, + ) + + @register_dataloader(Dataloaders.IMAGENET_TRAIN) def imagenet_train(dataset_params: Dict = None, dataloader_params: Dict = None, config_name="imagenet_dataset_params"): return get_data_loader( diff --git a/src/super_gradients/training/datasets/detection_datasets/detection_dataset.py b/src/super_gradients/training/datasets/detection_datasets/detection_dataset.py index a842131971..cb3b2e1677 100644 --- a/src/super_gradients/training/datasets/detection_datasets/detection_dataset.py +++ b/src/super_gradients/training/datasets/detection_datasets/detection_dataset.py @@ -119,7 +119,7 @@ def __init__( self.data_dir = data_dir if not Path(data_dir).exists(): - raise FileNotFoundError(f"data_dir={data_dir} not found. Please make sure that data_dir points toward your dataset.") + raise RuntimeError(f"data_dir={data_dir} not found. Please make sure that data_dir points toward your dataset.") # Number of images that are available (regardless of ignored images) self.n_available_samples = self._setup_data_source() diff --git a/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py index 45c106d078..55a96808da 100644 --- a/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py +++ b/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py @@ -146,7 +146,7 @@ def _setup_data_source(self) -> int: logger.warning(f"{len(labels_not_in_images)} label files are not associated to any image.") # Only keep names that are in both the images and the labels - valid_base_names = list(unique_image_file_base_names & unique_label_file_base_names) + valid_base_names = unique_image_file_base_names & unique_label_file_base_names if len(valid_base_names) != len(all_images_file_names): logger.warning( f"As a consequence, " @@ -154,12 +154,20 @@ def _setup_data_source(self) -> int: f"{len(valid_base_names)}/{len(all_labels_file_names)} label files will be used." ) - self.images_file_names = list( - sorted(image_full_name for image_full_name in all_images_file_names if remove_file_extension(image_full_name) in valid_base_names) - ) - self.labels_file_names = list( - sorted(label_full_name for label_full_name in all_labels_file_names if remove_file_extension(label_full_name) in valid_base_names) - ) + self.images_file_names = [] + self.labels_file_names = [] + for image_full_name in all_images_file_names: + base_name = remove_file_extension(image_full_name) + if base_name in valid_base_names: + self.images_file_names.append(image_full_name) + self.labels_file_names.append(base_name + ".txt") + # valid_labels_file_names = [label_full_name for label_full_name in all_labels_file_names if remove_file_extension(label_full_name) in valid_base_names] + # self.images_file_names = list( + # sorted(image_full_name for image_full_name in all_images_file_names if remove_file_extension(image_full_name) in valid_base_names) + # ) + # self.labels_file_names = list( + # sorted(label_full_name for label_full_name in all_labels_file_names if remove_file_extension(label_full_name) in valid_base_names) + # ) return len(self.images_file_names) def _load_annotation(self, sample_id: int) -> dict: From 04415c291613f0690e3a3f225590c7f92b504a91 Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Wed, 19 Apr 2023 11:25:57 +0300 Subject: [PATCH 2/6] improve naming --- src/super_gradients/common/object_names.py | 4 ++-- src/super_gradients/training/dataloaders/dataloaders.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/super_gradients/common/object_names.py b/src/super_gradients/common/object_names.py index 3c3f4d906f..21cc0532ef 100644 --- a/src/super_gradients/common/object_names.py +++ b/src/super_gradients/common/object_names.py @@ -334,8 +334,8 @@ class Dataloaders: COCO2017_VAL_SSD_LITE_MOBILENET_V2 = "coco2017_val_ssd_lite_mobilenet_v2" COCO2017_POSE_TRAIN = "coco2017_pose_train" COCO2017_POSE_VAL = "coco2017_pose_val" - COCO_YOLO_FORMAT_TRAIN = "coco_yolo_format_train" - COCO_YOLO_FORMAT_VAL = "coco_yolo_format_val" + coco_detection_yolo_format_train = "coco_detection_yolo_format_train" + coco_detection_yolo_format_val = "coco_detection_yolo_format_val" IMAGENET_TRAIN = "imagenet_train" IMAGENET_VAL = "imagenet_val" IMAGENET_EFFICIENTNET_TRAIN = "imagenet_efficientnet_train" diff --git a/src/super_gradients/training/dataloaders/dataloaders.py b/src/super_gradients/training/dataloaders/dataloaders.py index cab1de9edb..2b41a8253d 100644 --- a/src/super_gradients/training/dataloaders/dataloaders.py +++ b/src/super_gradients/training/dataloaders/dataloaders.py @@ -270,8 +270,8 @@ def roboflow_val_yolox(dataset_params: Dict = None, dataloader_params: Dict = No ) -@register_dataloader(Dataloaders.COCO_YOLO_FORMAT_TRAIN) -def coco_yolo_format_train(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: +@register_dataloader(Dataloaders.coco_detection_yolo_format_train) +def coco_detection_yolo_format_train(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: return get_data_loader( config_name="coco_detection_yolo_format_base_dataset_params", dataset_cls=YoloDarknetFormatDetectionDataset, @@ -281,8 +281,8 @@ def coco_yolo_format_train(dataset_params: Dict = None, dataloader_params: Dict ) -@register_dataloader(Dataloaders.COCO_YOLO_FORMAT_VAL) -def coco_yolo_format_val(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: +@register_dataloader(Dataloaders.coco_detection_yolo_format_val) +def coco_detection_yolo_format_val(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: return get_data_loader( config_name="coco_detection_yolo_format_base_dataset_params", dataset_cls=YoloDarknetFormatDetectionDataset, From 2df8f03b630de421cac6f561442c731e6e1f122f Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Wed, 19 Apr 2023 11:59:33 +0300 Subject: [PATCH 3/6] fix name --- src/super_gradients/common/object_names.py | 4 ++-- src/super_gradients/training/dataloaders/dataloaders.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/super_gradients/common/object_names.py b/src/super_gradients/common/object_names.py index 21cc0532ef..c558d22e2f 100644 --- a/src/super_gradients/common/object_names.py +++ b/src/super_gradients/common/object_names.py @@ -334,8 +334,8 @@ class Dataloaders: COCO2017_VAL_SSD_LITE_MOBILENET_V2 = "coco2017_val_ssd_lite_mobilenet_v2" COCO2017_POSE_TRAIN = "coco2017_pose_train" COCO2017_POSE_VAL = "coco2017_pose_val" - coco_detection_yolo_format_train = "coco_detection_yolo_format_train" - coco_detection_yolo_format_val = "coco_detection_yolo_format_val" + COCO_DETECTION_YOLO_FORMAT_TRAIN = "coco_detection_yolo_format_train" + COCO_DETECTION_YOLO_FORMAT_VAL = "coco_detection_yolo_format_val" IMAGENET_TRAIN = "imagenet_train" IMAGENET_VAL = "imagenet_val" IMAGENET_EFFICIENTNET_TRAIN = "imagenet_efficientnet_train" diff --git a/src/super_gradients/training/dataloaders/dataloaders.py b/src/super_gradients/training/dataloaders/dataloaders.py index 2b41a8253d..29f5d03694 100644 --- a/src/super_gradients/training/dataloaders/dataloaders.py +++ b/src/super_gradients/training/dataloaders/dataloaders.py @@ -270,7 +270,7 @@ def roboflow_val_yolox(dataset_params: Dict = None, dataloader_params: Dict = No ) -@register_dataloader(Dataloaders.coco_detection_yolo_format_train) +@register_dataloader(Dataloaders.COCO_DETECTION_YOLO_FORMAT_TRAIN) def coco_detection_yolo_format_train(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: return get_data_loader( config_name="coco_detection_yolo_format_base_dataset_params", @@ -281,7 +281,7 @@ def coco_detection_yolo_format_train(dataset_params: Dict = None, dataloader_par ) -@register_dataloader(Dataloaders.coco_detection_yolo_format_val) +@register_dataloader(Dataloaders.COCO_DETECTION_YOLO_FORMAT_VAL) def coco_detection_yolo_format_val(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader: return get_data_loader( config_name="coco_detection_yolo_format_base_dataset_params", From 3ac1b0ba1dc0511fde2c13ba19ab787ca1cda691 Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Wed, 19 Apr 2023 12:04:29 +0300 Subject: [PATCH 4/6] remove comments --- .../datasets/detection_datasets/yolo_format_detection.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py index 55a96808da..d567fec6da 100644 --- a/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py +++ b/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py @@ -161,13 +161,6 @@ def _setup_data_source(self) -> int: if base_name in valid_base_names: self.images_file_names.append(image_full_name) self.labels_file_names.append(base_name + ".txt") - # valid_labels_file_names = [label_full_name for label_full_name in all_labels_file_names if remove_file_extension(label_full_name) in valid_base_names] - # self.images_file_names = list( - # sorted(image_full_name for image_full_name in all_images_file_names if remove_file_extension(image_full_name) in valid_base_names) - # ) - # self.labels_file_names = list( - # sorted(label_full_name for label_full_name in all_labels_file_names if remove_file_extension(label_full_name) in valid_base_names) - # ) return len(self.images_file_names) def _load_annotation(self, sample_id: int) -> dict: From 317a63379e99f3e712b84de38935dfa8e200536f Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Wed, 19 Apr 2023 12:52:26 +0300 Subject: [PATCH 5/6] wip --- ...etection_yolo_format_base_dataset_params.yaml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml b/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml index 1e359643ab..cd1f397185 100644 --- a/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml +++ b/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml @@ -3,7 +3,13 @@ train_dataset_params: data_dir: /data/coco # TO FILL: Where the data is stored. images_dir: images/train2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`. labels_dir: labels/train2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`. - classes: [] + classes: [ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign, + parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag, + tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, + tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, + hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote, + keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, + hair drier, toothbrush] input_dim: [640, 640] cache_dir: cache: False @@ -55,7 +61,13 @@ val_dataset_params: data_dir: /data/coco # TO FILL: Where the data is stored. images_dir: images/val2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`. labels_dir: labels/val2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`. - classes: [] # TO FILL: Class names + classes: [ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign, + parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag, + tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, + tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, + hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote, + keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, + hair drier, toothbrush] input_dim: [640, 640] cache_dir: cache: False From 613d579f02654c5e5d5b888acdd8a9dcc5954aa9 Mon Sep 17 00:00:00 2001 From: Louis Dupont Date: Wed, 19 Apr 2023 12:59:07 +0300 Subject: [PATCH 6/6] add comment --- .../coco_detection_yolo_format_base_dataset_params.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml b/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml index cd1f397185..6ee9fbd1d4 100644 --- a/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml +++ b/src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml @@ -9,7 +9,7 @@ train_dataset_params: tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, - hair drier, toothbrush] + hair drier, toothbrush] # TO FILL: List of classes used in your dataset. input_dim: [640, 640] cache_dir: cache: False @@ -67,7 +67,7 @@ val_dataset_params: tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, - hair drier, toothbrush] + hair drier, toothbrush] # TO FILL: List of classes used in your dataset. input_dim: [640, 640] cache_dir: cache: False