From e28026b4cbfc100bae82654327a6df42d36aee79 Mon Sep 17 00:00:00 2001 From: Prokofiev Kirill Date: Wed, 6 Sep 2023 11:48:31 +0200 Subject: [PATCH] Add Semi-SL Instance Segmentation (#2444) * added semisl MT. Loss not working. * added recipie. Unbiased teacher works * added MT * exps contin * proceed with experiments * fix errors in forward * change hyperparams. Add clip for testing * some exps * change hyperparams * added per class thrsh * minor: * exps * add switching parameter for thrsh * din thrsh * added DEMA * added dinam thrsh * removed dinam * final round exps * added MT and semi-sl for ResNet * added semisl stage. Remove old otx * training launches. Merged code with OD task. * fix pre-commit * added tests for Semi-SL IS * fix detection resolution * added unit test for MT * overwrite iter params in semi-sl config. Return configuration.ymal back * added semisl for effnet. Hovewer it still doesn't work * changed teacher forward method. Fixed pre-commit * fix unit tests * fixed detection issues. Moved data pipeline * minor * fixed det unit test configure * rename file * revert detection scaling back * rename semisl data * some changes in unit test for focal loss * fixed pre-commit. returned incremental part back * rename selfsl in semisl * rename MeanTeacherHook * return yolox data_pipeline * fix pre-commit * added one more unit test * fix pre-commit * reply comments --- .../common/adapters/mmcv/hooks/__init__.py | 4 +- .../mmcv/hooks/custom_model_ema_hook.py | 20 ++ .../mmcv/hooks/dual_model_ema_hook.py | 19 +- ...d_teacher_hook.py => mean_teacher_hook.py} | 19 +- .../mmdet/models/detectors/__init__.py | 4 +- .../mmdet/models/detectors/mean_teacher.py | 267 ++++++++++++++++++ .../models/detectors/unbiased_teacher.py | 205 -------------- .../models/heads/custom_fcn_mask_head.py | 4 + .../mmdet/models/heads/custom_roi_head.py | 1 + .../adapters/mmdet/models/losses/__init__.py | 4 +- .../mmdet/models/losses/cross_focal_loss.py | 29 ++ .../detection/adapters/mmdet/task.py | 3 +- ...ne.py => base_semisl_det_data_pipeline.py} | 43 ++- .../semisl/semisl_is_eff_data_pipeline.py | 157 ++++++++++ .../semisl/semisl_is_res_data_pipeline.py | 140 +++++++++ .../cspdarknet_yolox/semisl/data_pipeline.py | 40 +-- .../cspdarknet_yolox/semisl/model.py | 4 +- .../semisl/data_pipeline.py | 2 +- .../cspdarknet_yolox_l/semisl/model.py | 4 +- .../semisl/data_pipeline.py | 2 +- .../cspdarknet_yolox_s/semisl/model.py | 4 +- .../semisl/data_pipeline.py | 2 +- .../cspdarknet_yolox_x/semisl/model.py | 4 +- .../mobilenetv2_atss/semisl/data_pipeline.py | 2 +- .../mobilenetv2_atss/semisl/model.py | 4 +- .../mobilenetv2_ssd/semisl/data_pipeline.py | 2 +- .../detection/mobilenetv2_ssd/semisl/model.py | 4 +- .../resnext101_atss/semisl/data_pipeline.py | 2 +- .../detection/resnext101_atss/semisl/model.py | 4 +- .../semisl/__init__.py | 4 + .../semisl/compression_config.json | 41 +++ .../semisl/data_pipeline.py | 6 + .../semisl/hparam.yaml | 15 + .../efficientnetb2b_maskrcnn/semisl/model.py | 123 ++++++++ .../resnet50_maskrcnn/semisl/__init__.py | 4 + .../semisl/compression_config.json | 41 +++ .../resnet50_maskrcnn/semisl/data_pipeline.py | 7 + .../resnet50_maskrcnn/semisl/hparam.yaml | 15 + .../resnet50_maskrcnn/semisl/model.py | 164 +++++++++++ .../resnet50_maskrcnn/template.yaml | 2 +- src/otx/api/entities/shapes/polygon.py | 5 +- .../core/data/adapter/base_dataset_adapter.py | 1 + src/otx/recipes/stages/detection/semisl.py | 9 +- .../stages/instance-segmentation/semisl.py | 30 ++ .../test_instance_segmentation.py | 46 +++ .../test_instance_segmentation.py | 28 ++ ...cher_hook.py => test_mean_teacher_hook.py} | 6 +- .../models/detectors/test_mean_teacher.py | 109 +++++++ .../models/losses/test_cross_focal_loss.py | 18 ++ 49 files changed, 1355 insertions(+), 318 deletions(-) rename src/otx/algorithms/common/adapters/mmcv/hooks/{unbiased_teacher_hook.py => mean_teacher_hook.py} (76%) create mode 100644 src/otx/algorithms/detection/adapters/mmdet/models/detectors/mean_teacher.py delete mode 100644 src/otx/algorithms/detection/adapters/mmdet/models/detectors/unbiased_teacher.py rename src/otx/algorithms/detection/configs/base/data/semisl/{base_semisl_data_pipeline.py => base_semisl_det_data_pipeline.py} (81%) create mode 100644 src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_eff_data_pipeline.py create mode 100644 src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_res_data_pipeline.py create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/__init__.py create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/compression_config.json create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/data_pipeline.py create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/hparam.yaml create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/model.py create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/__init__.py create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/compression_config.json create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/data_pipeline.py create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/hparam.yaml create mode 100644 src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/model.py create mode 100644 src/otx/recipes/stages/instance-segmentation/semisl.py rename tests/unit/algorithms/common/adapters/mmcv/hooks/{test_unbiased_teacher_hook.py => test_mean_teacher_hook.py} (74%) create mode 100644 tests/unit/algorithms/detection/adapters/mmdet/models/detectors/test_mean_teacher.py diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py index 9752f09d2f8..ed724ff53ab 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py @@ -37,6 +37,7 @@ from .ib_loss_hook import IBLossHook from .logger_hook import LoggerReplaceHook, OTXLoggerHook from .loss_dynamics_tracking_hook import LossDynamicsTrackingHook +from .mean_teacher_hook import MeanTeacherHook from .mem_cache_hook import MemCacheHook from .model_ema_v2_hook import ModelEmaV2Hook from .no_bias_decay_hook import NoBiasDecayHook @@ -51,7 +52,6 @@ from .semisl_cls_hook import SemiSLClsHook from .task_adapt_hook import TaskAdaptHook from .two_crop_transform_hook import TwoCropTransformHook -from .unbiased_teacher_hook import UnbiasedTeacherHook __all__ = [ "AdaptiveRepeatDataHook", @@ -87,7 +87,7 @@ "SemiSLClsHook", "TaskAdaptHook", "TwoCropTransformHook", - "UnbiasedTeacherHook", + "MeanTeacherHook", "MemCacheHook", "LossDynamicsTrackingHook", ] diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/custom_model_ema_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/custom_model_ema_hook.py index f321a63d196..b73fe48bae1 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/custom_model_ema_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/custom_model_ema_hook.py @@ -25,6 +25,26 @@ def __init__(self, momentum=0.0002, epoch_momentum=0.0, interval=1, **kwargs): self.epoch_momentum = epoch_momentum self.interval = interval + def before_run(self, runner): + """To resume model with it's ema parameters more friendly. + + Register ema parameter as ``named_buffer`` to model + """ + if is_module_wrapper(runner.model): + model = runner.model.module.model_s if hasattr(runner.model.module, "model_s") else runner.model.module + else: + model = runner.model.model_s if hasattr(runner.model, "model_s") else runner.model + self.param_ema_buffer = {} + self.model_parameters = dict(model.named_parameters(recurse=True)) + for name, value in self.model_parameters.items(): + # "." is not allowed in module's buffer name + buffer_name = f"ema_{name.replace('.', '_')}" + self.param_ema_buffer[name] = buffer_name + model.register_buffer(buffer_name, value.data.clone()) + self.model_buffers = dict(model.named_buffers(recurse=True)) + if self.checkpoint is not None: + runner.resume(self.checkpoint) + def before_train_epoch(self, runner): """Update the momentum.""" if self.epoch_momentum > 0.0: diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/dual_model_ema_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/dual_model_ema_hook.py index beb5e86068c..70376d2fb48 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/dual_model_ema_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/dual_model_ema_hook.py @@ -76,7 +76,7 @@ def before_run(self, runner): def before_train_epoch(self, runner): """Momentum update.""" - if runner.epoch == self.start_epoch: + if runner.epoch + 1 == self.start_epoch: self._copy_model() self.enabled = True @@ -110,21 +110,24 @@ def _get_model(self, runner): def _copy_model(self): with torch.no_grad(): for name, src_param in self.src_params.items(): - dst_param = self.dst_params[name] - dst_param.data.copy_(src_param.data) + if not name.startswith("ema_"): + dst_param = self.dst_params[name] + dst_param.data.copy_(src_param.data) def _ema_model(self): momentum = min(self.momentum, 1.0) with torch.no_grad(): for name, src_param in self.src_params.items(): - dst_param = self.dst_params[name] - dst_param.data.copy_(dst_param.data * (1 - momentum) + src_param.data * momentum) + if not name.startswith("ema_"): + dst_param = self.dst_params[name] + dst_param.data.copy_(dst_param.data * (1 - momentum) + src_param.data * momentum) def _diff_model(self): diff_sum = 0.0 with torch.no_grad(): for name, src_param in self.src_params.items(): - dst_param = self.dst_params[name] - diff = ((src_param - dst_param) ** 2).sum() - diff_sum += diff + if not name.startswith("ema_"): + dst_param = self.dst_params[name] + diff = ((src_param - dst_param) ** 2).sum() + diff_sum += diff return diff_sum diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/unbiased_teacher_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/mean_teacher_hook.py similarity index 76% rename from src/otx/algorithms/common/adapters/mmcv/hooks/unbiased_teacher_hook.py rename to src/otx/algorithms/common/adapters/mmcv/hooks/mean_teacher_hook.py index c45b805371f..30bae6457d6 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/unbiased_teacher_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/mean_teacher_hook.py @@ -14,29 +14,27 @@ @HOOKS.register_module() -class UnbiasedTeacherHook(DualModelEMAHook): - """UnbiasedTeacherHook for semi-supervised learnings.""" +class MeanTeacherHook(DualModelEMAHook): + """MeanTeacherHook for semi-supervised learnings.""" - def __init__(self, min_pseudo_label_ratio=0.1, **kwargs): + def __init__(self, **kwargs): super().__init__(**kwargs) - self.min_pseudo_label_ratio = min_pseudo_label_ratio self.unlabeled_loss_enabled = False def before_train_epoch(self, runner): """Enable unlabeled loss if over start epoch.""" - super().before_train_epoch(runner) - if runner.epoch + 1 < self.start_epoch: return if self.unlabeled_loss_enabled: return + super().before_train_epoch(runner) + average_pseudo_label_ratio = self._get_average_pseudo_label_ratio(runner) logger.info(f"avr_ps_ratio: {average_pseudo_label_ratio}") - if average_pseudo_label_ratio > self.min_pseudo_label_ratio: - self._get_model(runner).enable_unlabeled_loss() - self.unlabeled_loss_enabled = True - logger.info("---------- Enabled unlabeled loss") + self._get_model(runner).enable_unlabeled_loss(True) + self.unlabeled_loss_enabled = True + logger.info("---------- Enabled unlabeled loss and EMA smoothing") def after_train_iter(self, runner): """Update ema parameter every self.interval iterations.""" @@ -46,7 +44,6 @@ def after_train_iter(self, runner): if runner.epoch + 1 < self.start_epoch or self.unlabeled_loss_enabled is False: # Just copy parameters before enabled - self._copy_model() return # EMA diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/__init__.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/__init__.py index 6d1932436d4..5695d7b38fc 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/__init__.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/__init__.py @@ -14,8 +14,8 @@ from .custom_vfnet_detector import CustomVFNet from .custom_yolox_detector import CustomYOLOX from .l2sp_detector_mixin import L2SPDetectorMixin +from .mean_teacher import MeanTeacher from .sam_detector_mixin import SAMDetectorMixin -from .unbiased_teacher import UnbiasedTeacher __all__ = [ "CustomATSS", @@ -29,6 +29,6 @@ "CustomYOLOX", "L2SPDetectorMixin", "SAMDetectorMixin", - "UnbiasedTeacher", "CustomMaskRCNNTileOptimized", + "MeanTeacher", ] diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/mean_teacher.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/mean_teacher.py new file mode 100644 index 00000000000..6541ee99717 --- /dev/null +++ b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/mean_teacher.py @@ -0,0 +1,267 @@ +"""UnbiasedTeacher Class for mmdetection detectors.""" +# Copyright (C) 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +import copy +import functools + +import numpy as np +import torch +from mmdet.core import bbox2result, bbox2roi +from mmdet.core.mask.structures import BitmapMasks +from mmdet.models import DETECTORS, build_detector +from mmdet.models.detectors import BaseDetector + +from otx.algorithms.common.utils.logger import get_logger + +from .sam_detector_mixin import SAMDetectorMixin + +logger = get_logger() + +# TODO: Need to fix pylint issues +# pylint: disable=abstract-method, too-many-locals, unused-argument + + +@DETECTORS.register_module() +class MeanTeacher(SAMDetectorMixin, BaseDetector): + """Mean teacher framework for detection and instance segmentation.""" + + def __init__( + self, + arch_type, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "mask": 1.0}, + pseudo_conf_thresh=0.7, + bg_loss_weight=-1.0, + min_pseudo_label_ratio=0.0, + **kwargs + ): + super().__init__() + self.unlabeled_loss_weights = unlabeled_loss_weights + self.pseudo_conf_thresh = pseudo_conf_thresh + self.bg_loss_weight = bg_loss_weight + self.min_pseudo_label_ratio = min_pseudo_label_ratio + cfg = kwargs.copy() + cfg["type"] = arch_type + self.model_s = build_detector(cfg) + self.model_t = copy.deepcopy(self.model_s) + # warmup for first epochs + self.enable_unlabeled_loss(False) + + # Hooks for super_type transparent weight load/save + self._register_state_dict_hook(self.state_dict_hook) + self._register_load_state_dict_pre_hook(functools.partial(self.load_state_dict_pre_hook, self)) + + def extract_feat(self, imgs): + """Extract features for UnbiasedTeacher.""" + return self.model_s.extract_feat(imgs) + + def simple_test(self, img, img_metas, **kwargs): + """Test from img with UnbiasedTeacher.""" + return self.model_s.simple_test(img, img_metas, **kwargs) + + def aug_test(self, imgs, img_metas, **kwargs): + """Aug Test from img with UnbiasedTeacher.""" + return self.model_s.aug_test(imgs, img_metas, **kwargs) + + def forward_dummy(self, img, **kwargs): + """Dummy forward function for UnbiasedTeacher.""" + return self.model_s.forward_dummy(img, **kwargs) + + def enable_unlabeled_loss(self, mode=True): + """Enable function for UnbiasedTeacher unlabeled loss.""" + self.unlabeled_loss_enabled = mode + + def forward_teacher(self, img, img_metas): + """Method to extract predictions (pseudo labeles) from teacher.""" + x = self.model_t.extract_feat(img) + proposal_list = self.model_t.rpn_head.simple_test_rpn(x, img_metas) + + det_bboxes, det_labels = self.model_t.roi_head.simple_test_bboxes( + x, img_metas, proposal_list, self.model_t.test_cfg.rcnn, rescale=False + ) + + bbox_results = [ + bbox2result(det_bboxes[i], det_labels[i], self.model_t.roi_head.bbox_head.num_classes) + for i in range(len(det_bboxes)) + ] + + if not self.model_t.with_mask: + return bbox_results + else: + ori_shapes = tuple(meta["ori_shape"] for meta in img_metas) + scale_factors = tuple(meta["scale_factor"] for meta in img_metas) + + num_imgs = len(det_bboxes) + if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes): + segm_results = [ + [[] for _ in range(self.model_t.roi_head.mask_head.num_classes)] for _ in range(num_imgs) + ] + else: + _bboxes = [det_bboxes[i][:, :4] for i in range(len(det_bboxes))] + mask_rois = bbox2roi(_bboxes) + mask_results = self.model_t.roi_head._mask_forward(x, mask_rois) + mask_pred = mask_results["mask_pred"] + # split batch mask prediction back to each image + num_mask_roi_per_img = [len(det_bbox) for det_bbox in det_bboxes] + mask_preds = mask_pred.split(num_mask_roi_per_img, 0) + + # apply mask post-processing to each image individually + segm_results = [] + for i in range(num_imgs): + if det_bboxes[i].shape[0] == 0: + segm_results.append([[] for _ in range(self.model_t.roi_head.mask_head.num_classes)]) + else: + segm_result = self.model_t.roi_head.mask_head.get_scaled_seg_masks( + mask_preds[i], + _bboxes[i], + det_labels[i], + self.model_t.test_cfg.rcnn, + ori_shapes[i], + scale_factors[i], + rescale=False, + ) + segm_results.append(segm_result) + + return list(zip(bbox_results, segm_results)) + + def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_masks=None, gt_bboxes_ignore=None, **kwargs): + """Forward function for UnbiasedTeacher.""" + losses = {} + # Supervised loss + # TODO: check img0 only option (which is common for mean teacher method) + forward_train = functools.partial( + self.model_s.forward_train, + img, + img_metas, + gt_bboxes, + gt_labels, + gt_bboxes_ignore=(gt_bboxes_ignore if gt_bboxes_ignore else None), + ) + if self.model_s.with_mask: + sl_losses = forward_train(gt_masks=gt_masks) + else: + sl_losses = forward_train() + losses.update(sl_losses) + + if not self.unlabeled_loss_enabled: + return losses + + # Pseudo labels from teacher + ul_args = kwargs.get("extra_0", {}) + ul_img = ul_args.get("img") + ul_img0 = ul_args.get("img0") + ul_img_metas = ul_args.get("img_metas") + if ul_img is None: + return losses + with torch.no_grad(): + if self.model_t.with_mask: + teacher_outputs = self.forward_teacher(ul_img0, ul_img_metas) + else: + teacher_outputs = self.model_t.forward_test([ul_img0], [ul_img_metas], rescale=False) + current_device = ul_img0[0].device + pseudo_bboxes, pseudo_labels, pseudo_masks, pseudo_ratio = self.generate_pseudo_labels( + teacher_outputs, device=current_device, img_meta=ul_img_metas, **kwargs + ) + losses.update(ps_ratio=torch.tensor([pseudo_ratio], device=current_device)) + + # Unsupervised loss + # Compute only if min_pseudo_label_ratio is reached + if pseudo_ratio >= self.min_pseudo_label_ratio: + if self.bg_loss_weight >= 0.0: + self.model_s.bbox_head.bg_loss_weight = self.bg_loss_weight + if self.model_t.with_mask: + ul_losses = self.model_s.forward_train( + ul_img, ul_img_metas, pseudo_bboxes, pseudo_labels, gt_masks=pseudo_masks + ) + else: + ul_losses = self.model_s.forward_train(ul_img, ul_img_metas, pseudo_bboxes, pseudo_labels) + + if self.bg_loss_weight >= 0.0: + self.model_s.bbox_head.bg_loss_weight = -1.0 + + for ul_loss_name in ul_losses.keys(): + if ul_loss_name.startswith("loss_"): + ul_loss = ul_losses[ul_loss_name] + target_loss = ul_loss_name.split("_")[-1] + if self.unlabeled_loss_weights[target_loss] == 0: + continue + self._update_unlabeled_loss(losses, ul_loss, ul_loss_name, self.unlabeled_loss_weights[target_loss]) + return losses + + def generate_pseudo_labels(self, teacher_outputs, img_meta, **kwargs): + """Generate pseudo label for UnbiasedTeacher.""" + device = kwargs.pop("device") + all_pseudo_bboxes = [] + all_pseudo_labels = [] + all_pseudo_masks = [] + num_all_bboxes = 0 + num_all_pseudo = 0 + for i, teacher_bboxes_labels in enumerate(teacher_outputs): + image_shape = img_meta[i]["img_shape"][:-1] + pseudo_bboxes = [] + pseudo_labels = [] + pseudo_masks = [] + if self.model_t.with_mask: + teacher_bboxes_labels = zip(*teacher_bboxes_labels) + for label, teacher_bboxes_masks in enumerate(teacher_bboxes_labels): + if self.model_t.with_mask: + teacher_bboxes = teacher_bboxes_masks[0] + teacher_masks = teacher_bboxes_masks[1] + else: + teacher_bboxes = teacher_bboxes_masks + confidences = teacher_bboxes[:, -1] + pseudo_indices = confidences > self.pseudo_conf_thresh + pseudo_bboxes.append(teacher_bboxes[pseudo_indices, :4]) # model output: [x y w h conf] + pseudo_labels.append(np.full([sum(pseudo_indices)], label)) + if self.model_t.with_mask: + if np.any(pseudo_indices): + teacher_masks = [np.expand_dims(mask, 0) for mask in teacher_masks] + pseudo_masks.append(np.concatenate(teacher_masks)[pseudo_indices]) + else: + pseudo_masks.append(np.array([]).reshape(0, *image_shape)) + + num_all_bboxes += teacher_bboxes.shape[0] + if len(pseudo_bboxes): + num_all_pseudo += pseudo_bboxes[-1].shape[0] + + if len(pseudo_bboxes) > 0: + all_pseudo_bboxes.append(torch.from_numpy(np.concatenate(pseudo_bboxes)).to(device)) + all_pseudo_labels.append(torch.from_numpy(np.concatenate(pseudo_labels)).to(device)) + if self.model_t.with_mask: + all_pseudo_masks.append(BitmapMasks(np.concatenate(pseudo_masks), *image_shape)) + + pseudo_ratio = float(num_all_pseudo) / num_all_bboxes if num_all_bboxes > 0 else 0.0 + return all_pseudo_bboxes, all_pseudo_labels, all_pseudo_masks, pseudo_ratio + + @staticmethod + def _update_unlabeled_loss(sum_loss, loss, loss_name, weight): + if isinstance(loss, list): + sum_loss[loss_name + "_ul"] = [cur_loss * weight for cur_loss in loss] + else: + sum_loss[loss_name + "_ul"] = loss * weight + + @staticmethod + def state_dict_hook(module, state_dict, prefix, *args, **kwargs): # pylint: disable=unused-argument + """Redirect student model as output state_dict (teacher as auxilliary).""" + logger.info("----------------- MeanTeacherSegmentor.state_dict_hook() called") + for key in list(state_dict.keys()): + value = state_dict.pop(key) + if not prefix or key.startswith(prefix): + key = key.replace(prefix, "", 1) + if key.startswith("model_s."): + key = key.replace("model_s.", "", 1) + elif key.startswith("model_t."): + continue + key = prefix + key + state_dict[key] = value + return state_dict + + @staticmethod + def load_state_dict_pre_hook(module, state_dict, *args, **kwargs): # pylint: disable=unused-argument + """Redirect input state_dict to teacher model.""" + logger.info("----------------- MeanTeacherSegmentor.load_state_dict_pre_hook() called") + for key in list(state_dict.keys()): + value = state_dict.pop(key) + state_dict["model_s." + key] = value + state_dict["model_t." + key] = value diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/unbiased_teacher.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/unbiased_teacher.py deleted file mode 100644 index 5b5d9639c34..00000000000 --- a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/unbiased_teacher.py +++ /dev/null @@ -1,205 +0,0 @@ -"""UnbiasedTeacher Class for mmdetection detectors.""" -# Copyright (C) 2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -import copy -import functools - -import numpy as np -import torch -from mmdet.models import DETECTORS, build_detector -from mmdet.models.detectors import BaseDetector - -from otx.algorithms.common.utils.logger import get_logger - -from .sam_detector_mixin import SAMDetectorMixin - -logger = get_logger() - -# TODO: Need to fix pylint issues -# pylint: disable=abstract-method, too-many-locals, unused-argument - - -@DETECTORS.register_module() -class UnbiasedTeacher(SAMDetectorMixin, BaseDetector): - """Unbiased teacher frameowork for general detectors.""" - - def __init__( - self, - unlabeled_loss_weight=1.0, - unlabeled_loss_names=None, - pseudo_conf_thresh=0.7, - enable_unlabeled_loss=False, - bg_loss_weight=-1.0, - **kwargs - ): - super().__init__() - self.unlabeled_loss_weight = unlabeled_loss_weight - self.unlabeled_loss_names = ( - unlabeled_loss_names - if unlabeled_loss_names - else [ - "loss_cls", - ] - ) - self.pseudo_conf_thresh = pseudo_conf_thresh - self.unlabeled_loss_enabled = enable_unlabeled_loss - self.bg_loss_weight = bg_loss_weight - - cfg = kwargs.copy() - arch_type = cfg.pop("arch_type") - cfg["type"] = arch_type - self.model_s = build_detector(cfg) - self.model_t = copy.deepcopy(self.model_s) - - # Hooks for super_type transparent weight load/save - self._register_state_dict_hook(self.state_dict_hook) - self._register_load_state_dict_pre_hook(functools.partial(self.load_state_dict_pre_hook, self)) - - def extract_feat(self, imgs): - """Extract features for UnbiasedTeacher.""" - return self.model_t.extract_feat(imgs) - - def simple_test(self, img, img_metas, **kwargs): - """Test from img with UnbiasedTeacher.""" - return self.model_t.simple_test(img, img_metas, **kwargs) - - def aug_test(self, imgs, img_metas, **kwargs): - """Aug Test from img with UnbiasedTeacher.""" - return self.model_t.aug_test(imgs, img_metas, **kwargs) - - def forward_dummy(self, img, **kwargs): - """Dummy forward function for UnbiasedTeacher.""" - return self.model_t.forward_dummy(img, **kwargs) - - def enable_unlabeled_loss(self, mode=True): - """Enable function for UnbiasedTeacher unlabeled loss.""" - self.unlabeled_loss_enabled = mode - - def forward_train(self, img, img_metas, img0, gt_bboxes, gt_labels, gt_bboxes_ignore=None, **kwargs): - """Forward function for UnbiasedTeacher.""" - losses = {} - - # Supervised loss - # TODO: check img0 only option (which is common for mean teacher method) - sl_losses = self.model_s.forward_train( - torch.cat((img0, img)), # weak + hard augmented images - img_metas + img_metas, - gt_bboxes + gt_bboxes, - gt_labels + gt_labels, - gt_bboxes_ignore + gt_bboxes_ignore if gt_bboxes_ignore else None, - ) - losses.update(sl_losses) - - # Pseudo labels from teacher - ul_args = kwargs.get("extra_0", {}) # Supposing ComposedDL([labeled, unlabeled]) data loader - ul_img = ul_args.get("img") - ul_img0 = ul_args.get("img0") - ul_img_metas = ul_args.get("img_metas") - if ul_img is None: - return losses - with torch.no_grad(): - teacher_outputs = self.model_t.forward_test( - [ul_img0], - [ul_img_metas], - rescale=False, # easy augmentation - ) - current_device = ul_img0[0].device - pseudo_bboxes, pseudo_labels, pseudo_ratio = self.generate_pseudo_labels( - teacher_outputs, device=current_device, **kwargs - ) - ps_recall = self.eval_pseudo_label_recall(pseudo_bboxes, ul_args.get("gt_bboxes", [])) - losses.update(ps_recall=torch.tensor(ps_recall, device=current_device)) - losses.update(ps_ratio=torch.tensor([pseudo_ratio], device=current_device)) - - if not self.unlabeled_loss_enabled or self.unlabeled_loss_weight <= 0.001: # TODO: move back - return losses - - # Unsupervised loss - if self.bg_loss_weight >= 0.0: - self.model_s.bbox_head.bg_loss_weight = self.bg_loss_weight - ul_losses = self.model_s.forward_train(ul_img, ul_img_metas, pseudo_bboxes, pseudo_labels) # hard augmentation - if self.bg_loss_weight >= 0.0: - self.model_s.bbox_head.bg_loss_weight = -1.0 - - for ul_loss_name in self.unlabeled_loss_names: - ul_loss = ul_losses[ul_loss_name] - if isinstance(ul_loss, torch.Tensor): - ul_loss = [ul_loss] - losses[ul_loss_name + "_ul"] = [loss * self.unlabeled_loss_weight for loss in ul_loss] - # TODO: apply loss_bbox when adopting QFL; - - return losses - - def generate_pseudo_labels(self, teacher_outputs, **kwargs): - """Generate pseudo label for UnbiasedTeacher.""" - device = kwargs.pop("device") - all_pseudo_bboxes = [] - all_pseudo_labels = [] - num_all_bboxes = 0 - num_all_pseudo = 0 - for teacher_bboxes_labels in teacher_outputs: - pseudo_bboxes = [] - pseudo_labels = [] - for label, teacher_bboxes in enumerate(teacher_bboxes_labels): - confidences = teacher_bboxes[:, -1] - pseudo_indices = confidences > self.pseudo_conf_thresh - pseudo_bboxes.append(teacher_bboxes[pseudo_indices, :4]) # model output: [x y w h conf] - pseudo_labels.append(np.full([sum(pseudo_indices)], label)) - num_all_bboxes += teacher_bboxes.shape[0] - num_all_pseudo += pseudo_bboxes[-1].shape[0] - all_pseudo_bboxes.append(torch.from_numpy(np.concatenate(pseudo_bboxes)).to(device)) - all_pseudo_labels.append(torch.from_numpy(np.concatenate(pseudo_labels)).to(device)) - # print(f'{num_all_pseudo} / {num_all_bboxes}') - pseudo_ratio = float(num_all_pseudo) / num_all_bboxes if num_all_bboxes > 0 else 0.0 - return all_pseudo_bboxes, all_pseudo_labels, pseudo_ratio - - def eval_pseudo_label_recall(self, all_pseudo_bboxes, all_gt_bboxes): - """Eval pseudo label recall for test only.""" - from mmdet.core.evaluation.recall import _recalls, bbox_overlaps - - img_num = len(all_gt_bboxes) - if img_num == 0: - return [0.0] - all_ious = np.ndarray((img_num,), dtype=object) - for i in range(img_num): - ps_bboxes = all_pseudo_bboxes[i] - gt_bboxes = all_gt_bboxes[i] - # prop_num = min(ps_bboxes.shape[0], 100) - prop_num = ps_bboxes.shape[0] - if gt_bboxes is None or gt_bboxes.shape[0] == 0: - ious = np.zeros((0, ps_bboxes.shape[0]), dtype=np.float32) - elif ps_bboxes is None or ps_bboxes.shape[0] == 0: - ious = np.zeros((gt_bboxes.shape[0], 0), dtype=np.float32) - else: - ious = bbox_overlaps(gt_bboxes.detach().cpu().numpy(), ps_bboxes.detach().cpu().numpy()[:prop_num, :4]) - all_ious[i] = ious - recall = _recalls(all_ious, np.array([100]), np.array([0.5])) - return recall - - @staticmethod - def state_dict_hook(_, state_dict, prefix, *args, **kwargs): - """Redirect teacher model as output state_dict (student as auxilliary).""" - logger.info("----------------- UnbiasedTeacher.state_dict_hook() called") - for k in list(state_dict.keys()): - value = state_dict.pop(k) - if not prefix or k.startswith(prefix): - k = k.replace(prefix, "", 1) - if k.startswith("model_t."): - k = k.replace("model_t.", "", 1) - elif k.startswith("model_s."): - continue - k = prefix + k - state_dict[k] = value - return state_dict - - @staticmethod - def load_state_dict_pre_hook(_, state_dict, *args, **kwargs): - """Redirect input state_dict to teacher model.""" - logger.info("----------------- UnbiasedTeacher.load_state_dict_pre_hook() called") - for k in list(state_dict.keys()): - value = state_dict.pop(k) - if "model_s." not in k: - k = "model_t." + k - state_dict[k] = value diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_fcn_mask_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_fcn_mask_head.py index ec9cc152359..9b7fa999196 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_fcn_mask_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_fcn_mask_head.py @@ -73,6 +73,10 @@ class label c. cls_segms[labels[i]].append(mask[0]) return cls_segms + def get_scaled_seg_masks(self, *args, **kwargs): + """Original method "get_seg_mask" from FCNMaskHead. Used in Semi-SL algorithm.""" + return super().get_seg_masks(*args, **kwargs) + if is_mmdeploy_enabled(): from mmdeploy.core import FUNCTION_REWRITER diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py index f9e80ad369d..05902fc9e70 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py @@ -142,6 +142,7 @@ def loss( losses = dict() if cls_score is not None and cls_score.numel() > 0: avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.0) + if isinstance(self.loss_cls, CrossSigmoidFocalLoss): losses["loss_cls"] = self.loss_cls( cls_score, diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/losses/__init__.py b/src/otx/algorithms/detection/adapters/mmdet/models/losses/__init__.py index 02d81b31c2b..622c1710a06 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/losses/__init__.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/losses/__init__.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # -from .cross_focal_loss import CrossSigmoidFocalLoss +from .cross_focal_loss import CrossSigmoidFocalLoss, OrdinaryFocalLoss from .l2sp_loss import L2SPLoss -__all__ = ["CrossSigmoidFocalLoss", "L2SPLoss"] +__all__ = ["CrossSigmoidFocalLoss", "L2SPLoss", "OrdinaryFocalLoss"] diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/losses/cross_focal_loss.py b/src/otx/algorithms/detection/adapters/mmdet/models/losses/cross_focal_loss.py index f104a7fc40d..dec1182efae 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/losses/cross_focal_loss.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/losses/cross_focal_loss.py @@ -122,3 +122,32 @@ def forward( valid_label_mask=valid_label_mask, ) return loss_cls + + +@LOSSES.register_module() +class OrdinaryFocalLoss(nn.Module): + """Focal loss without balancing.""" + + def __init__(self, gamma=1.5, **kwargs): + super(OrdinaryFocalLoss, self).__init__() + assert gamma >= 0 + self.gamma = gamma + + def forward(self, input, target, label_weights=None, avg_factor=None, reduction="mean", **kwars): + """Forward function for focal loss.""" + if target.numel() == 0: + return 0.0 * input.sum() + + CE = F.cross_entropy(input, target, reduction="none") + p = torch.exp(-CE) + loss = (1 - p) ** self.gamma * CE + if label_weights is not None: + assert len(loss) == len(label_weights) + loss = loss * label_weights + if avg_factor is None: + avg_factor = target.shape[0] + if reduction == "sum": + return loss.sum() + if reduction == "mean": + return loss.sum() / avg_factor + return loss diff --git a/src/otx/algorithms/detection/adapters/mmdet/task.py b/src/otx/algorithms/detection/adapters/mmdet/task.py index 76ac12e02aa..16b623be5ba 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/task.py +++ b/src/otx/algorithms/detection/adapters/mmdet/task.py @@ -373,7 +373,7 @@ def _infer_model( model = self.build_model(cfg, fp16=cfg.get("fp16", False)) model.CLASSES = target_classes model.eval() - feature_model = model.model_t if self._train_type == TrainType.Semisupervised else model + feature_model = model.model_s if self._train_type == TrainType.Semisupervised else model model = build_data_parallel(model, cfg, distributed=False) # InferenceProgressCallback (Time Monitor enable into Infer task) @@ -457,7 +457,6 @@ def hook(module, inp, outp): "Number of elements should be the same, however, number of outputs are " f"{len(eval_predictions)}, {len(feature_vectors)}, and {len(saliency_maps)}" ) - results = dict( outputs=dict( classes=target_classes, diff --git a/src/otx/algorithms/detection/configs/base/data/semisl/base_semisl_data_pipeline.py b/src/otx/algorithms/detection/configs/base/data/semisl/base_semisl_det_data_pipeline.py similarity index 81% rename from src/otx/algorithms/detection/configs/base/data/semisl/base_semisl_data_pipeline.py rename to src/otx/algorithms/detection/configs/base/data/semisl/base_semisl_det_data_pipeline.py index 4ddf15ff710..6e8e6f90145 100644 --- a/src/otx/algorithms/detection/configs/base/data/semisl/base_semisl_data_pipeline.py +++ b/src/otx/algorithms/detection/configs/base/data/semisl/base_semisl_det_data_pipeline.py @@ -7,7 +7,8 @@ # This is from otx/mpa/recipes/stages/_base_/data/pipelines/ubt.py # This could be needed sync with incr-learning's data pipeline -__img_scale = (992, 736) +__img_scale_test = (992, 736) + __img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True) common_pipeline = [ @@ -82,34 +83,22 @@ dict(type="LoadImageFromOTXDataset", enable_memcache=True), dict(type="LoadAnnotationFromOTXDataset", with_bbox=True), dict(type="MinIoURandomCrop", min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), - *common_pipeline, - dict(type="ToTensor", keys=["gt_bboxes", "gt_labels"]), dict( - type="ToDataContainer", - fields=[ - dict(key="img", stack=True), - dict(key="img0", stack=True), - dict(key="gt_bboxes"), - dict(key="gt_labels"), - ], - ), - dict( - type="Collect", - keys=["img", "img0", "gt_bboxes", "gt_labels"], - meta_keys=[ - "ori_filename", - "flip_direction", - "scale_factor", - "img_norm_cfg", - "gt_ann_ids", - "flip", - "ignored_labels", - "ori_shape", - "filename", - "img_shape", - "pad_shape", + type="Resize", + img_scale=[ + (992, 736), + (896, 736), + (1088, 736), + (992, 672), + (992, 800), ], + multiscale_mode="value", + keep_ratio=False, ), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Normalize", **__img_norm_cfg), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), ] unlabeled_pipeline = [ @@ -135,7 +124,7 @@ dict(type="LoadImageFromOTXDataset"), dict( type="MultiScaleFlipAug", - img_scale=__img_scale, + img_scale=__img_scale_test, flip=False, transforms=[ dict(type="Resize", keep_ratio=False), diff --git a/src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_eff_data_pipeline.py b/src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_eff_data_pipeline.py new file mode 100644 index 00000000000..8a838f8e0ea --- /dev/null +++ b/src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_eff_data_pipeline.py @@ -0,0 +1,157 @@ +"""Data Pipeline for Semi-Supervised Learning Detection Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=invalid-name + +# This is from otx/mpa/recipes/stages/_base_/data/pipelines/ubt.py +# This could be needed sync with incr-learning's data pipeline +__dataset_type = "OTXDetDataset" +__img_size = (1024, 1024) +__img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True) + +common_pipeline = [ + dict(type="Resize", img_scale=__img_size, keep_ratio=False), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="BranchImage", key_map=dict(img="img0")), + dict(type="NDArrayToPILImage", keys=["img"]), + dict( + type="RandomApply", + transform_cfgs=[ + dict( + type="ColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.1, + ) + ], + p=0.8, + ), + dict(type="RandomGrayscale", p=0.2), + dict( + type="RandomApply", + transform_cfgs=[ + dict( + type="RandomGaussianBlur", + sigma_min=0.1, + sigma_max=2.0, + ) + ], + p=0.5, + ), + dict(type="PILImageToNDArray", keys=["img"]), + dict(type="Normalize", **__img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="NDArrayToTensor", keys=["img", "img0"]), + dict( + type="RandomErasing", + p=0.7, + scale=[0.05, 0.2], + ratio=[0.3, 3.3], + value="random", + ), + dict( + type="RandomErasing", + p=0.5, + scale=[0.02, 0.2], + ratio=[0.10, 6.0], + value="random", + ), + dict( + type="RandomErasing", + p=0.3, + scale=[0.02, 0.2], + ratio=[0.05, 8.0], + value="random", + ), +] + +train_pipeline = [ + dict(type="LoadImageFromOTXDataset", enable_memcache=True), + dict( + type="LoadAnnotationFromOTXDataset", + domain="instance_segmentation", + with_bbox=True, + with_mask=True, + poly2mask=False, + ), + dict(type="Resize", img_scale=__img_size, keep_ratio=False), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Normalize", **__img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="DefaultFormatBundle"), + dict( + type="Collect", + keys=["img", "gt_bboxes", "gt_labels", "gt_masks"], + meta_keys=[ + "ori_filename", + "flip_direction", + "scale_factor", + "img_norm_cfg", + "gt_ann_ids", + "flip", + "ignored_labels", + "ori_shape", + "filename", + "img_shape", + "pad_shape", + ], + ), +] + +unlabeled_pipeline = [ + dict(type="LoadImageFromOTXDataset", enable_memcache=True), + *common_pipeline, + dict( + type="ToDataContainer", + fields=[ + dict(key="img", stack=True), + dict(key="img0", stack=True), + ], + ), + dict( + type="Collect", + keys=[ + "img", + "img0", + ], + ), +] + +test_pipeline = [ + dict(type="LoadImageFromOTXDataset", enable_memcache=True), + dict( + type="MultiScaleFlipAug", + img_scale=__img_size, + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=False), + dict(type="Normalize", **__img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="ImageToTensor", keys=["img"]), + dict(type="Collect", keys=["img"]), + ], + ), +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=2, + train=dict(type="RepeatDataset", times=13, dataset=dict(type=__dataset_type, pipeline=train_pipeline)), + val=dict( + type=__dataset_type, + test_mode=True, + pipeline=test_pipeline, + ), + test=dict( + type=__dataset_type, + test_mode=True, + pipeline=test_pipeline, + ), + unlabeled=dict( + type=__dataset_type, + pipeline=unlabeled_pipeline, + ), +) diff --git a/src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_res_data_pipeline.py b/src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_res_data_pipeline.py new file mode 100644 index 00000000000..29492935b34 --- /dev/null +++ b/src/otx/algorithms/detection/configs/base/data/semisl/semisl_is_res_data_pipeline.py @@ -0,0 +1,140 @@ +"""Data Pipeline for Semi-Supervised Learning Detection Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=invalid-name + +# This is from otx/mpa/recipes/stages/_base_/data/pipelines/ubt.py +# This could be needed sync with incr-learning's data pipeline +__img_size = (1344, 800) +__dataset_type = "OTXDetDataset" +__img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +common_pipeline = [ + dict(type="Resize", img_scale=__img_size, keep_ratio=False), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="BranchImage", key_map=dict(img="img0")), + dict(type="NDArrayToPILImage", keys=["img"]), + dict( + type="RandomApply", + transform_cfgs=[ + dict( + type="ColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.1, + ) + ], + p=0.8, + ), + dict(type="RandomGrayscale", p=0.2), + dict( + type="RandomApply", + transform_cfgs=[ + dict( + type="RandomGaussianBlur", + sigma_min=0.1, + sigma_max=2.0, + ) + ], + p=0.5, + ), + dict(type="PILImageToNDArray", keys=["img"]), + dict(type="Normalize", **__img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="NDArrayToTensor", keys=["img", "img0"]), + dict( + type="RandomErasing", + p=0.7, + scale=[0.05, 0.2], + ratio=[0.3, 3.3], + value="random", + ), + dict( + type="RandomErasing", + p=0.5, + scale=[0.02, 0.2], + ratio=[0.10, 6.0], + value="random", + ), + dict( + type="RandomErasing", + p=0.3, + scale=[0.02, 0.2], + ratio=[0.05, 8.0], + value="random", + ), +] + +train_pipeline = [ + dict(type="LoadImageFromOTXDataset", enable_memcache=True), + dict( + type="LoadAnnotationFromOTXDataset", + domain="instance_segmentation", + with_bbox=True, + with_mask=True, + poly2mask=False, + ), + dict(type="MinIoURandomCrop", min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), + dict(type="Resize", img_scale=[(1333, 400), (1333, 1200)], keep_ratio=False), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Normalize", **__img_norm_cfg), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]), +] + +unlabeled_pipeline = [ + dict(type="LoadImageFromOTXDataset", enable_memcache=True), + *common_pipeline, + dict( + type="ToDataContainer", + fields=[ + dict(key="img", stack=True), + dict(key="img0", stack=True), + ], + ), + dict( + type="Collect", + keys=[ + "img", + "img0", + ], + ), +] + +test_pipeline = [ + dict(type="LoadImageFromOTXDataset", enable_memcache=True), + dict( + type="MultiScaleFlipAug", + img_scale=__img_size, + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=False), + dict(type="Normalize", **__img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="ImageToTensor", keys=["img"]), + dict(type="Collect", keys=["img"]), + ], + ), +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=2, + train=dict(type="RepeatDataset", times=13, dataset=dict(type=__dataset_type, pipeline=train_pipeline)), + val=dict( + type=__dataset_type, + test_mode=True, + pipeline=test_pipeline, + ), + test=dict( + type=__dataset_type, + test_mode=True, + pipeline=test_pipeline, + ), + unlabeled=dict( + type=__dataset_type, + pipeline=unlabeled_pipeline, + ), +) diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/data_pipeline.py index b54b4dd5706..bd4fcb67f26 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/data_pipeline.py @@ -18,6 +18,8 @@ # This is from src/otx/mpa/recipes/stages/_base_/data/pipelines/ubt.py # This could be needed sync with incr-learning's data pipeline +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] + __img_scale = (992, 736) __img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) @@ -93,34 +95,22 @@ dict(type="LoadImageFromOTXDataset", enable_memcache=True), dict(type="LoadAnnotationFromOTXDataset", with_bbox=True), dict(type="MinIoURandomCrop", min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), - *common_pipeline, - dict(type="ToTensor", keys=["gt_bboxes", "gt_labels"]), dict( - type="ToDataContainer", - fields=[ - dict(key="img", stack=True), - dict(key="img0", stack=True), - dict(key="gt_bboxes"), - dict(key="gt_labels"), - ], - ), - dict( - type="Collect", - keys=["img", "img0", "gt_bboxes", "gt_labels"], - meta_keys=[ - "ori_filename", - "flip_direction", - "scale_factor", - "img_norm_cfg", - "gt_ann_ids", - "flip", - "ignored_labels", - "ori_shape", - "filename", - "img_shape", - "pad_shape", + type="Resize", + img_scale=[ + (992, 736), + (896, 736), + (1088, 736), + (992, 672), + (992, 800), ], + multiscale_mode="value", + keep_ratio=False, ), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Normalize", **__img_norm_cfg), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), ] unlabeled_pipeline = [ diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/model.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/model.py index e3635b919a3..dfec937a5ba 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox/semisl/model.py @@ -19,9 +19,9 @@ _base_ = ["../../../../../../recipes/stages/detection/semisl.py", "../../../base/models/detector.py"] model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0}, type="CustomYOLOX", backbone=dict(type="CSPDarknet", deepen_factor=0.33, widen_factor=0.375, out_indices=(2, 3, 4)), neck=dict(type="YOLOXPAFPN", in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/data_pipeline.py index 96558789256..bdab7531379 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/data_pipeline.py @@ -4,4 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 -_base_ = ["../../../base/data/semisl/base_semisl_data_pipeline.py"] +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/model.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/model.py index 692b138ca14..cd756de1166 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/semisl/model.py @@ -8,9 +8,9 @@ _base_ = ["../../../../../../recipes/stages/detection/semisl.py", "../../../base/models/detector.py"] model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0}, type="CustomYOLOX", backbone=dict(type="CSPDarknet", deepen_factor=1.0, widen_factor=1.0, out_indices=(2, 3, 4)), neck=dict(type="YOLOXPAFPN", in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3), diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/data_pipeline.py index 0f7e4963a26..72b6cc8e03a 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/data_pipeline.py @@ -4,4 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 -_base_ = ["../../../base/data/semisl/base_semisl_data_pipeline.py"] +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/model.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/model.py index 35ea2bb2215..ed14a18b4e0 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/semisl/model.py @@ -8,9 +8,9 @@ _base_ = ["../../../../../../recipes/stages/detection/semisl.py", "../../../base/models/detector.py"] model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0}, type="CustomYOLOX", backbone=dict(type="CSPDarknet", deepen_factor=0.33, widen_factor=0.5, out_indices=(2, 3, 4)), neck=dict(type="YOLOXPAFPN", in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=4), diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/data_pipeline.py index 39149f796b3..86a6141d24f 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/data_pipeline.py @@ -4,4 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 -_base_ = ["../../../base/data/semisl/base_semisl_data_pipeline.py"] +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/model.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/model.py index a68349c0857..28daece945a 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/semisl/model.py @@ -8,9 +8,9 @@ _base_ = ["../../../../../../recipes/stages/detection/semisl.py", "../../../base/models/detector.py"] model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0}, type="CustomYOLOX", backbone=dict(type="CSPDarknet", deepen_factor=1.33, widen_factor=1.25, out_indices=(2, 3, 4)), neck=dict(type="YOLOXPAFPN", in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4), diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/data_pipeline.py index c93f3c33dc4..f48f80903a1 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/data_pipeline.py @@ -4,4 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 -_base_ = ["../../../base/data/semisl/base_semisl_data_pipeline.py"] +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/model.py b/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/model.py index ddd79acf663..7fe599b1fa8 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_atss/semisl/model.py @@ -23,9 +23,9 @@ ] model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0, "centerness": 1.0}, type="CustomATSS", neck=dict( type="FPN", diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/data_pipeline.py index 13c964aaacb..e3acb581abb 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/data_pipeline.py @@ -4,4 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 -_base_ = ["../../../base/data/semisl/base_semisl_data_pipeline.py"] +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/model.py b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/model.py index 9b04e6655f7..973d12d6161 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/semisl/model.py @@ -25,9 +25,9 @@ __width_mult = 1.0 model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0}, type="CustomSingleStageDetector", bbox_head=dict( type="CustomSSDHead", diff --git a/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/data_pipeline.py index b096d49e213..06dc8432d1e 100644 --- a/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/data_pipeline.py +++ b/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/data_pipeline.py @@ -4,4 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 -_base_ = ["../../../base/data/semisl/base_semisl_data_pipeline.py"] +_base_ = ["../../../base/data/semisl/base_semisl_det_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/model.py b/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/model.py index be723e39028..735f0e12d09 100644 --- a/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/model.py +++ b/src/otx/algorithms/detection/configs/detection/resnext101_atss/semisl/model.py @@ -11,9 +11,9 @@ ] model = dict( - super_type="UnbiasedTeacher", + super_type="MeanTeacher", pseudo_conf_thresh=0.25, - unlabeled_loss_weight=1.0, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "obj": 1.0, "centerness": 1.0}, type="CustomATSS", backbone=dict( type="ResNeXt", diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/__init__.py b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/__init__.py new file mode 100644 index 00000000000..abad44e5db3 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/__init__.py @@ -0,0 +1,4 @@ +"""Initialization of Mask-RCNN EfficientNetb2b model for Semi-SL Instance Segmentation Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/compression_config.json b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/compression_config.json new file mode 100644 index 00000000000..7e0cba46aa9 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/compression_config.json @@ -0,0 +1,41 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "target_metric_name": "mAP", + "input_info": { + "sample_size": [1, 3, 1024, 1024] + }, + "compression": [], + "log_dir": "/tmp" + } + }, + "nncf_quantization": { + "optimizer": { + "lr": 0.0005 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 1000 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 1000 + } + } + } + ], + "accuracy_aware_training": { + "mode": "early_exit", + "params": { + "maximal_absolute_accuracy_degradation": 0.01, + "maximal_total_epochs": 20 + } + } + } + }, + "order_of_parts": ["nncf_quantization"] +} diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/data_pipeline.py new file mode 100644 index 00000000000..9bca72665e9 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/data_pipeline.py @@ -0,0 +1,6 @@ +"""Data Pipeline of Mask-RCNN EfficientNetb2b model for Semi-Supervised Learning Instance Segmentation Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +_base_ = ["../../../base/data/semisl/semisl_is_eff_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/hparam.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/hparam.yaml new file mode 100644 index 00000000000..b748451b602 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/hparam.yaml @@ -0,0 +1,15 @@ +# Hyperparameters. +# since we use repeat dataset for semi-sl +# -> change iteration related parameters +hyper_parameters: + parameter_overrides: + algo_backend: + train_type: + default_value: Semisupervised + learning_parameters: + num_iters: + default_value: 25 # actual num epochs 25 * repeat dataset times + early_stop_start: + default_value: 7 # when unlabeled branch enabled + early_stop_patience: + default_value: 3 diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/model.py new file mode 100644 index 00000000000..5d2fe5dcc5b --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/semisl/model.py @@ -0,0 +1,123 @@ +"""Model Configuration of Mask-RCNN EfficientNetb2b model for Semi-Supervised Learning Instance Segmentation Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=invalid-name + +_base_ = [ + "../../../../../../recipes/stages/instance-segmentation/semisl.py", + "../../../../../common/adapters/mmcv/configs/backbones/efficientnet_b2b.yaml", + "../../../base/models/detector.py", +] + +task = "instance-segmentation" + +model = dict( + super_type="MeanTeacher", + pseudo_conf_thresh=0.7, + unlabeled_loss_weights={"cls": 2.0, "bbox": 1.0, "mask": 1.0}, + type="CustomMaskRCNN", + neck=dict(type="FPN", in_channels=[24, 48, 120, 352], out_channels=80, num_outs=5), + rpn_head=dict( + type="RPNHead", + in_channels=80, + feat_channels=80, + anchor_generator=dict(type="AnchorGenerator", scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), + bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + roi_head=dict( + type="CustomRoIHead", # Use CustomROIHead for Ignore mode + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=80, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=dict( + type="Shared2FCBBoxHead", + in_channels=80, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2] + ), + reg_class_agnostic=False, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=80, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="CustomFCNMaskHead", + num_convs=4, + in_channels=80, + conv_out_channels=80, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), + train_cfg=dict( + rpn=dict( + assigner=dict( + type="CustomMaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + gpu_assign_thr=300, + ), + sampler=dict(type="RandomSampler", num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.8), + min_bbox_size=0, + ), + rcnn=dict( + assigner=dict( + type="CustomMaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1, + gpu_assign_thr=300, + ), + sampler=dict(type="RandomSampler", num=256, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ), + test_cfg=dict( + rpn=dict( + nms_across_levels=False, + nms_pre=800, + max_per_img=500, + nms=dict(type="nms", iou_threshold=0.8), + min_bbox_size=0, + ), + rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.7), max_per_img=500, mask_thr_binary=0.5), + ), +) +load_from = "https://storage.openvinotoolkit.org/repositories/\ +openvino_training_extensions/models/instance_segmentation/\ +v2/efficientnet_b2b-mask_rcnn-576x576.pth" + +evaluation = dict(interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5]) +fp16 = dict(loss_scale=512.0) +ignore = True diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/__init__.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/__init__.py new file mode 100644 index 00000000000..3539217f31c --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/__init__.py @@ -0,0 +1,4 @@ +"""Initialization of Mask-RCNN ResNet50 model for Semi-SL Instance Segmentation Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/compression_config.json b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/compression_config.json new file mode 100644 index 00000000000..ab687b9c6a2 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/compression_config.json @@ -0,0 +1,41 @@ +{ + "base": { + "find_unused_parameters": true, + "nncf_config": { + "target_metric_name": "mAP", + "input_info": { + "sample_size": [1, 3, 1344, 800] + }, + "compression": [], + "log_dir": "/tmp" + } + }, + "nncf_quantization": { + "optimizer": { + "lr": 0.0005 + }, + "nncf_config": { + "compression": [ + { + "algorithm": "quantization", + "initializer": { + "range": { + "num_init_samples": 1000 + }, + "batchnorm_adaptation": { + "num_bn_adaptation_samples": 1000 + } + } + } + ], + "accuracy_aware_training": { + "mode": "early_exit", + "params": { + "maximal_absolute_accuracy_degradation": 0.01, + "maximal_total_epochs": 20 + } + } + } + }, + "order_of_parts": ["nncf_quantization"] +} diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/data_pipeline.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/data_pipeline.py new file mode 100644 index 00000000000..79928399ac3 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/data_pipeline.py @@ -0,0 +1,7 @@ +"""Data Pipeline of Mask-RCNN ResNet50 model for Semi-Supervised Learning Instance Segmentation Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +_base_ = ["../../../base/data/semisl/semisl_is_res_data_pipeline.py"] diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/hparam.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/hparam.yaml new file mode 100644 index 00000000000..36a594db2c9 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/hparam.yaml @@ -0,0 +1,15 @@ +# Hyperparameters. +# since we use repeat dataset for semi-sl +# -> change iteration related parameters +hyper_parameters: + parameter_overrides: + algo_backend: + train_type: + default_value: Semisupervised + learning_parameters: + num_iters: + default_value: 25 # actual num epochs 25 * repeat dataset times + early_stop_start: + default_value: 8 # when unlabeled branch enabled + early_stop_patience: + default_value: 3 diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/model.py new file mode 100644 index 00000000000..28aaf8384a6 --- /dev/null +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/semisl/model.py @@ -0,0 +1,164 @@ +"""Model Configuration of Mask-RCNN ResNet50 model for Semi-Supervised Learning Instance Segmentation Task.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# pylint: disable=invalid-name + +_base_ = [ + "../../../../../../recipes/stages/instance-segmentation/semisl.py", + "../../../../../common/adapters/mmcv/configs/backbones/resnet50.yaml", + "../../../base/models/detector.py", +] + +task = "instance-segmentation" + +model = dict( + super_type="MeanTeacher", + pseudo_conf_thresh=0.7, + unlabeled_loss_weights={"cls": 1.0, "bbox": 1.0, "mask": 1.0}, + type="CustomMaskRCNN", + neck=dict( + type="FPN", + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + ), + rpn_head=dict( + type="RPNHead", + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + ), + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[1.0, 1.0, 1.0, 1.0], + ), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + roi_head=dict( + type="CustomRoIHead", # Use CustomROIHead for Ignore mode + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2], + ), + reg_class_agnostic=False, + loss_cls=dict(type="OrdinaryFocalLoss", gamma=1.5, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="CustomFCNMaskHead", + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), + train_cfg=dict( + rpn=dict( + assigner=dict( + type="CustomMaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + gpu_assign_thr=300, + ), + sampler=dict( + type="RandomSampler", + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + ), + allowed_border=-1, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + assigner=dict( + type="CustomMaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1, + gpu_assign_thr=300, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ), + test_cfg=dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + score_thr=0.05, + nms=dict(type="nms", iou_threshold=0.5, max_num=100), + max_per_img=100, + mask_thr_binary=0.5, + ), + ), +) +load_from = "https://download.openmmlab.com/mmdetection/\ +v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/\ +mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth" + +evaluation = dict(interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5]) +ignore = True + +custom_hooks = [ + dict( + type="CustomModelEMAHook", + priority="ABOVE_NORMAL", + epoch_momentum=0.1, + ), + dict(type="MeanTeacherHook", epoch_momentum=0.0, start_epoch=8, momentum=0.0004), +] diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml index fe29ee513ea..17a74b1c25e 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml @@ -31,7 +31,7 @@ hyper_parameters: inference_batch_size: default_value: 1 learning_rate: - default_value: 0.001 + default_value: 0.007 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: default_value: 100 diff --git a/src/otx/api/entities/shapes/polygon.py b/src/otx/api/entities/shapes/polygon.py index f7829bd95cc..f510a8eeaab 100644 --- a/src/otx/api/entities/shapes/polygon.py +++ b/src/otx/api/entities/shapes/polygon.py @@ -11,6 +11,7 @@ import warnings from operator import attrgetter from typing import List, Optional +import numpy as np from shapely.geometry import Polygon as shapely_polygon @@ -28,8 +29,8 @@ class Point: __slots__ = ["x", "y"] def __init__(self, x: float, y: float): - self.x = x - self.y = y + self.x = np.clip(x, a_min=0.0, a_max=1.0) + self.y = np.clip(y, a_min=0.0, a_max=1.0) def __repr__(self): """String representation of the point.""" diff --git a/src/otx/core/data/adapter/base_dataset_adapter.py b/src/otx/core/data/adapter/base_dataset_adapter.py index cd92d56c251..51b62a0cede 100644 --- a/src/otx/core/data/adapter/base_dataset_adapter.py +++ b/src/otx/core/data/adapter/base_dataset_adapter.py @@ -368,6 +368,7 @@ def remove_unused_label_entities(self, used_labels: List): used_labels (List): list for index of used label """ clean_label_entities = [] + for used_label in used_labels: clean_label_entities.append(self.label_entities[used_label]) self.label_entities = clean_label_entities diff --git a/src/otx/recipes/stages/detection/semisl.py b/src/otx/recipes/stages/detection/semisl.py index 89a7096e1ca..06e46763db7 100644 --- a/src/otx/recipes/stages/detection/semisl.py +++ b/src/otx/recipes/stages/detection/semisl.py @@ -9,16 +9,9 @@ custom_hooks = [ dict( - type="UnbiasedTeacherHook", + type="MeanTeacherHook", epoch_momentum=0.1, start_epoch=2, - # min_pseudo_label_ratio=0.1, - min_pseudo_label_ratio=0.0, - ), - dict( - type="DualModelEMAHook", - epoch_momentum=0.4, - start_epoch=2, ), dict( type="LazyEarlyStoppingHook", diff --git a/src/otx/recipes/stages/instance-segmentation/semisl.py b/src/otx/recipes/stages/instance-segmentation/semisl.py new file mode 100644 index 00000000000..ccf3d7eb148 --- /dev/null +++ b/src/otx/recipes/stages/instance-segmentation/semisl.py @@ -0,0 +1,30 @@ +_base_ = ["./train.py", "../_base_/models/detectors/detector.py"] + +task = "instance-segmentation" + +task_adapt = dict( + type="mpa", + op="REPLACE", + efficient_mode=False, +) + +runner = dict(max_epochs=300) + +optimizer_config = dict(_delete_=True, grad_clip=None) + +ignore = True +find_unused_parameters = True + +adaptive_validation_interval = dict( + max_interval=5, + enable_adaptive_interval_hook=True, + enable_eval_before_run=True, +) +custom_hooks = [ + dict( + type="CustomModelEMAHook", + priority="ABOVE_NORMAL", + epoch_momentum=0.4, + ), + dict(type="MeanTeacherHook", epoch_momentum=0.0, start_epoch=8, momentum=0.0004), +] diff --git a/tests/e2e/cli/instance_segmentation/test_instance_segmentation.py b/tests/e2e/cli/instance_segmentation/test_instance_segmentation.py index 1d2b0bd0ef0..4486252ca85 100644 --- a/tests/e2e/cli/instance_segmentation/test_instance_segmentation.py +++ b/tests/e2e/cli/instance_segmentation/test_instance_segmentation.py @@ -4,6 +4,7 @@ # import copy import os +from pathlib import Path import pytest import torch @@ -55,6 +56,16 @@ "train_params": ["params", "--learning_parameters.num_iters", "5", "--learning_parameters.batch_size", "2"], } +# Semi-SL +args_semisl = { + "--train-data-roots": "tests/assets/car_tree_bug", + "--val-data-roots": "tests/assets/car_tree_bug", + "--test-data-roots": "tests/assets/car_tree_bug", + "--unlabeled-data-roots": "tests/assets/car_tree_bug", + "--input": "tests/assets/car_tree_bug/images/train", + "train_params": ["params", "--learning_parameters.num_iters", "2", "--learning_parameters.batch_size", "4"], +} + # Training params for resume, num_iters*2 resume_params = [ "params", @@ -290,3 +301,38 @@ def test_otx_multi_gpu_train(self, template, tmp_dir_path): args1 = copy.deepcopy(args) args1["--gpus"] = "0,1" otx_train_testing(template, tmp_dir_path, otx_dir, args1) + + +class TestToolsMPASemiSLInstanceSegmentation: + @e2e_pytest_component + @pytest.mark.parametrize("template", templates, ids=templates_ids) + def test_otx_train(self, template, tmp_dir_path): + if not (Path(template.model_template_path).parent / "semisl").is_dir(): + pytest.skip("Semi-SL training type isn't available for this template") + tmp_dir_path = tmp_dir_path / "ins_seg/test_semisl" + otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl) + template_dir = get_template_dir(template, tmp_dir_path) + assert (Path(template_dir) / "semisl").is_dir() + + @e2e_pytest_component + @pytest.mark.skipif(TT_STABILITY_TESTS, reason="This is TT_STABILITY_TESTS") + @pytest.mark.parametrize("template", templates, ids=templates_ids) + def test_otx_eval(self, template, tmp_dir_path): + if not (Path(template.model_template_path).parent / "semisl").is_dir(): + pytest.skip("Semi-SL training type isn't available for this template") + tmp_dir_path = tmp_dir_path / "ins_seg/test_semisl" + otx_eval_testing(template, tmp_dir_path, otx_dir, args) + + @e2e_pytest_component + @pytest.mark.skipif(TT_STABILITY_TESTS, reason="This is TT_STABILITY_TESTS") + @pytest.mark.skipif(MULTI_GPU_UNAVAILABLE, reason="The number of gpu is insufficient") + @pytest.mark.parametrize("template", templates, ids=templates_ids) + def test_otx_multi_gpu_train_semisl(self, template, tmp_dir_path): + if not (Path(template.model_template_path).parent / "semisl").is_dir(): + pytest.skip("Semi-SL training type isn't available for this template") + tmp_dir_path = tmp_dir_path / "ins_seg/test_multi_gpu_semisl" + args_semisl_multigpu = copy.deepcopy(args_semisl) + args_semisl_multigpu["--gpus"] = "0,1" + otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl_multigpu) + template_dir = get_template_dir(template, tmp_dir_path) + assert (Path(template_dir) / "semisl").is_dir() diff --git a/tests/integration/cli/instance_segmentation/test_instance_segmentation.py b/tests/integration/cli/instance_segmentation/test_instance_segmentation.py index 67093895f2d..a7fab166dd0 100644 --- a/tests/integration/cli/instance_segmentation/test_instance_segmentation.py +++ b/tests/integration/cli/instance_segmentation/test_instance_segmentation.py @@ -36,6 +36,15 @@ "train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "2"], } +args_semisl = { + "--train-data-roots": "tests/assets/car_tree_bug", + "--val-data-roots": "tests/assets/car_tree_bug", + "--test-data-roots": "tests/assets/car_tree_bug", + "--unlabeled-data-roots": "tests/assets/car_tree_bug", + "--input": "tests/assets/car_tree_bug/images/train", + "train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "1"], +} + # Training params for resume, num_iters*2 resume_params = [ "params", @@ -168,3 +177,22 @@ def test_otx_multi_gpu_train(self, template, tmp_dir_path): args1 = copy.deepcopy(args) args1["--gpus"] = "0,1" otx_train_testing(template, tmp_dir_path, otx_dir, args1) + + @e2e_pytest_component + @pytest.mark.parametrize("template", default_templates, ids=default_templates_ids) + def test_otx_train_semisl(self, template, tmp_dir_path): + tmp_dir_path = tmp_dir_path / "ins_seg/test_semisl" + otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl) + template_dir = get_template_dir(template, tmp_dir_path) + assert (Path(template_dir) / "semisl").is_dir() + + @e2e_pytest_component + @pytest.mark.skipif(MULTI_GPU_UNAVAILABLE, reason="The number of gpu is insufficient") + @pytest.mark.parametrize("template", default_templates, ids=default_templates_ids) + def test_otx_multi_gpu_train_semisl(self, template, tmp_dir_path): + tmp_dir_path = tmp_dir_path / "ins_seg/test_multi_gpu_semisl" + args_semisl_multigpu = copy.deepcopy(args_semisl) + args_semisl_multigpu["--gpus"] = "0,1" + otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl_multigpu) + template_dir = get_template_dir(template, tmp_dir_path) + assert (Path(template_dir) / "semisl").is_dir() diff --git a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_unbiased_teacher_hook.py b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_mean_teacher_hook.py similarity index 74% rename from tests/unit/algorithms/common/adapters/mmcv/hooks/test_unbiased_teacher_hook.py rename to tests/unit/algorithms/common/adapters/mmcv/hooks/test_mean_teacher_hook.py index ea3877683d0..d45b2ecce90 100644 --- a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_unbiased_teacher_hook.py +++ b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_mean_teacher_hook.py @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 # -from otx.algorithms.common.adapters.mmcv.hooks.unbiased_teacher_hook import ( - UnbiasedTeacherHook, +from otx.algorithms.common.adapters.mmcv.hooks.mean_teacher_hook import ( + MeanTeacherHook, ) from tests.test_suite.e2e_test_system import e2e_pytest_unit @@ -13,7 +13,7 @@ class TestUnbiasedTeacherHook: @e2e_pytest_unit def test_temp(self) -> None: try: - hook = UnbiasedTeacherHook() + hook = MeanTeacherHook() assert hook is None except Exception as e: print(e) diff --git a/tests/unit/algorithms/detection/adapters/mmdet/models/detectors/test_mean_teacher.py b/tests/unit/algorithms/detection/adapters/mmdet/models/detectors/test_mean_teacher.py new file mode 100644 index 00000000000..8422c83e9a1 --- /dev/null +++ b/tests/unit/algorithms/detection/adapters/mmdet/models/detectors/test_mean_teacher.py @@ -0,0 +1,109 @@ +from typing import Dict +import pytest +import torch +import numpy as np +from mmdet.core.mask.structures import BitmapMasks + +from otx.algorithms.detection.adapters.mmdet.models.detectors.mean_teacher import ( + MeanTeacher, +) +from tests.test_suite.e2e_test_system import e2e_pytest_unit + + +class TestMeanTeacher: + @pytest.fixture(autouse=True) + def setup(self, mocker): + mocker.patch("otx.algorithms.detection.adapters.mmdet.models.detectors.mean_teacher.build_detector") + mocker.patch.object(MeanTeacher, "_register_state_dict_hook") + mocker.patch.object(MeanTeacher, "_register_load_state_dict_pre_hook") + self.mt_is = MeanTeacher("CustomMaskRCNN") + self.mt_det = MeanTeacher("CustomATSS", unlabeled_loss_weights={"cls": 1, "bbox": 1, "obj": 1}) + self.img = torch.rand(4, 3, 300, 300) + self.img_metas = [dict(ori_shape=(300, 300), scale_factor=1.0)] * 4 + self.gt_bboxes = torch.rand(4, 4) + self.gt_labels = torch.randint(20, (4, 1)) + self.gt_masks = torch.rand(4, 3, 300, 300) + + @e2e_pytest_unit + def test_forward_train(self, mocker, monkeypatch): + def mock_forward_train(*args, **kwargs): + return {"loss_bbox": 1.0, "loss_cls": 1.0, "loss_mask": 1.0} + + def mock_generate_pseudo_labels(*args, **kwargs): + return (self.gt_bboxes, self.gt_labels, self.gt_masks, 0.0) + + monkeypatch.setattr(self.mt_is.model_s, "forward_train", mock_forward_train) + loss = self.mt_is.forward_train(self.img, self.img_metas, self.gt_bboxes, self.gt_labels) + gt_loss = mock_forward_train() + assert loss == gt_loss + self.mt_is.enable_unlabeled_loss(True) + monkeypatch.setattr(MeanTeacher, "generate_pseudo_labels", mock_generate_pseudo_labels) + mocker.patch.object(MeanTeacher, "forward_teacher") + kwargs = {"extra_0": {"img0": self.img, "img": self.img, "img_metas": self.img_metas}} + loss_mask = self.mt_is.forward_train( + self.img, self.img_metas, self.gt_bboxes, self.gt_labels, self.gt_masks, **kwargs + ) + gt_loss.update( + { + "ps_ratio": torch.tensor([0.0]), + "loss_bbox_ul": 1.0, + "loss_cls_ul": 1.0, + "loss_mask_ul": 1.0, + } + ) + assert loss_mask == gt_loss + + @e2e_pytest_unit + def test_forward_train_detection(self, mocker, monkeypatch): + def mock_forward_train(*args, **kwargs): + return {"loss_bbox": 1.0, "loss_cls": 1.0, "loss_obj": 1.0} + + def mock_generate_pseudo_labels(*args, **kwargs): + return (self.gt_bboxes, self.gt_labels, [], 0.0) + + monkeypatch.setattr(self.mt_det.model_s, "forward_train", mock_forward_train) + loss = self.mt_det.forward_train(self.img, self.img_metas, self.gt_bboxes, self.gt_labels) + gt_loss = mock_forward_train() + assert loss == gt_loss + self.mt_det.enable_unlabeled_loss(True) + monkeypatch.setattr(MeanTeacher, "generate_pseudo_labels", mock_generate_pseudo_labels) + mocker.patch.object(MeanTeacher, "forward_teacher") + kwargs = {"extra_0": {"img0": self.img, "img": self.img, "img_metas": self.img_metas}} + loss_det = self.mt_det.forward_train(self.img, self.img_metas, self.gt_bboxes, self.gt_labels, **kwargs) + gt_loss.update( + { + "ps_ratio": torch.tensor([0.0]), + "loss_bbox_ul": 1.0, + "loss_cls_ul": 1.0, + "loss_obj_ul": 1.0, + } + ) + assert loss_det == gt_loss + + @e2e_pytest_unit + def test_generate_pseudo_labels(self, mocker, monkeypatch): + gt_bboxes = np.random.rand(1, 1, 5) + gt_masks = np.random.rand(1, 1, 300, 300) > 0.5 + teacher_output = [([gt_bboxes, gt_masks])] + img_metas = [{"img_shape": (300, 300, 3)}] + monkeypatch.setattr(self.mt_is.model_t, "with_mask", True) + out = self.mt_is.generate_pseudo_labels(teacher_output, img_metas, **{"device": "cpu"}) + assert len(out) == 4 + assert isinstance(out[2][-1], BitmapMasks) + teacher_output = [gt_bboxes] + monkeypatch.setattr(self.mt_is.model_t, "with_mask", False) + out = self.mt_is.generate_pseudo_labels(teacher_output, img_metas, **{"device": "cpu"}) + assert len(out) == 4 + assert len(out[2]) == 0 + + @e2e_pytest_unit + def test_forward_teacher(self, mocker, monkeypatch): + def mock_simple_test_bboxes(*args, **kwargs): + return [self.gt_bboxes], [self.gt_labels] + + monkeypatch.setattr(self.mt_is.model_t.roi_head, "simple_test_bboxes", mock_simple_test_bboxes) + mocker.patch("otx.algorithms.detection.adapters.mmdet.models.detectors.mean_teacher.bbox2result") + mocker.patch("otx.algorithms.detection.adapters.mmdet.models.detectors.mean_teacher.bbox2roi") + teacher_output = self.mt_is.forward_teacher(self.img, self.img_metas) + assert teacher_output is not None + assert isinstance(teacher_output, list) diff --git a/tests/unit/algorithms/detection/adapters/mmdet/models/losses/test_cross_focal_loss.py b/tests/unit/algorithms/detection/adapters/mmdet/models/losses/test_cross_focal_loss.py index 24758937cdc..7e2256ffdd2 100644 --- a/tests/unit/algorithms/detection/adapters/mmdet/models/losses/test_cross_focal_loss.py +++ b/tests/unit/algorithms/detection/adapters/mmdet/models/losses/test_cross_focal_loss.py @@ -9,6 +9,7 @@ from otx.algorithms.detection.adapters.mmdet.models.losses.cross_focal_loss import ( CrossSigmoidFocalLoss, + OrdinaryFocalLoss, ) @@ -43,3 +44,20 @@ def test_reduction(self): loss3 = self.loss(self.predictions, self.labels, reduction_override="sum") assert loss1.shape == (3, 3) assert loss2 != loss3 + + +class TestFocalLoss: + @pytest.fixture(autouse=True) + def setup(self): + """Create the loss object""" + self.predictions = torch.tensor([[0, 1, 0], [0, 1, 0], [0, 0, 1]], dtype=torch.float32) + self.labels = torch.tensor([1, 1, 2]) + self.loss = OrdinaryFocalLoss(gamma=1.5) + + def test_forward(self): + loss = self.loss(self.predictions, self.labels, reduction="none") + assert loss is not None + assert loss.shape == (3,) + loss = self.loss(self.predictions, self.labels, avg_factor=1, reduction="mean") + assert isinstance(loss.item(), float) + assert loss > 0