openvinotoolkit · goodsong81 · Sep 20, 2023 · Sep 4, 2023 · Sep 7, 2023 · Sep 7, 2023
@@ -15,7 +15,8 @@ All notable changes to this project will be documented in this file.
 - Add a new object detector Lite-DINO(<https://github.com/openvinotoolkit/training_extensions/pull/2457>)
 - Add Semi-SL Mean Teacher algorithm for Instance Segmentation task(<https://github.com/openvinotoolkit/training_extensions/pull/2444>)
 - Official supports for YOLOX-X, YOLOX-L, YOLOX-S, ResNeXt101-ATSS (<https://github.com/openvinotoolkit/training_extensions/pull/2485>)
-- Add new argument to track resource usage in train command(<https://github.com/openvinotoolkit/training_extensions/pull/2500>)
+- Add new argument to track resource usage in train command (<https://github.com/openvinotoolkit/training_extensions/pull/2500>)
+- Adapt input size automatically based on dataset statistics (<https://github.com/openvinotoolkit/training_extensions/pull/2499>)
 
 ### Enhancements
 

@@ -19,7 +19,6 @@
 from otx.algorithms.common.adapters.mmcv.semisl_mixin import SemiSLConfigurerMixin
 from otx.algorithms.common.adapters.mmcv.utils.config_utils import (
     InputSizeManager,
-    get_configured_input_size,
     recursively_update_cfg,
     update_or_add_custom_hook,
 )
@@ -166,11 +165,17 @@ def configure_input_size(
         cfg, input_size_config: InputSizePreset = InputSizePreset.DEFAULT, model_ckpt_path: Optional[str] = None
     ):
         """Change input size if necessary."""
-        input_size = get_configured_input_size(input_size_config, model_ckpt_path)
-        if input_size is None:
+        manager = InputSizeManager(cfg)
+        input_size = manager.get_configured_input_size(input_size_config, model_ckpt_path)
+        if input_size is None:  # InputSizePreset.DEFAULT
             return
 
-        InputSizeManager(cfg.data).set_input_size(input_size)
+        if input_size == (0, 0):  # InputSizePreset.AUTO
+            input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager)
+            if input_size is None:
+                return
+
+        manager.set_input_size(input_size)
         logger.info("Input size is changed to {}".format(input_size))
 
 

@@ -635,7 +635,7 @@ def patch_input_preprocessing(deploy_cfg):
                 mo_options.flags = list(set(mo_options.flags))
 
             def patch_input_shape(deploy_cfg):
-                input_size_manager = InputSizeManager(cfg.data)
+                input_size_manager = InputSizeManager(cfg)
                 size = input_size_manager.get_input_size_from_cfg("test")
                 assert all(isinstance(i, int) and i > 0 for i in size)
                 # default is static shape to prevent an unexpected error

@@ -277,18 +277,20 @@ learning_parameters:
     warning: null
   input_size:
     affects_outcome_of: INFERENCE
-    default_value: Default
+    default_value: Auto
     description:
       The input size of the given model could be configured to one of the predefined resolutions.
       Reduced training and inference time could be expected by using smaller input size.
-      Defaults to per-model default resolution.
+      Defaults to Auto, in which input size is automatically determined based on dataset statistics.
     editable: true
     enum_name: InputSizePreset
     header: Configure model input size.
     options:
       DEFAULT: "Default"
+      AUTO: "Auto"
       _64x64: "64x64"
       _128x128: "128x128"
+      _224x224: "224x224"
       _256x256: "256x256"
       _384x384: "384x384"
       _512x512: "512x512"

@@ -3,8 +3,9 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+import json
 import os
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import numpy as np
 import torch
@@ -18,6 +19,7 @@
     patch_persistent_workers,
 )
 from otx.algorithms.common.adapters.mmcv.utils.config_utils import (
+    InputSizeManager,
     patch_color_conversion,
     patch_from_hyperparams,
     recursively_update_cfg,
@@ -26,6 +28,7 @@
 from otx.algorithms.common.configs.configuration_enums import InputSizePreset
 from otx.algorithms.common.tasks.base_task import OnHookInitialized
 from otx.algorithms.common.utils import UncopiableDefaultDict, append_dist_rank_suffix
+from otx.algorithms.common.utils.data import compute_robust_dataset_statistics
 from otx.algorithms.common.utils.logger import get_logger
 from otx.api.usecases.reporting.time_monitor_callback import TimeMonitorCallback
 from otx.core.data import caching
@@ -492,3 +495,41 @@ def get_data_cfg(cfg, subset):
                 dataset = dataset.dataset
             return dataset
         return cfg.data[subset]
+
+    @staticmethod
+    def adapt_input_size_to_dataset(
+        cfg, input_size_manager: InputSizeManager, downscale_only: bool = True, use_annotations: bool = False
+    ) -> Optional[Tuple[int, int]]:
+        """Compute appropriate model input size w.r.t. dataset statistics.
+
+        Args:
+            cfg (Dict): Global configuration.
+            input_size_manager: (InputSizeManager): Pre-configured input size manager
+            downscale_only (bool) : Whether to allow only smaller size than default setting. Defaults to True.
+            use_annotations (bool): Whether to consider annotation shapes to compute input size. Defaults to False.
+
+        Returns:
+            Tuple[int, int]: (width, height) or None
+        """
+
+        data_cfg = BaseConfigurer.get_data_cfg(cfg, "train")
+        dataset = data_cfg.get("otx_dataset", None)
+        if dataset is None:
+            return None
+
+        stat = compute_robust_dataset_statistics(dataset, use_annotations)
+        if not stat:
+            return None
+        logger.info(f"Dataset stat: {json.dumps(stat, indent=4)}")
+
+        # Fit to typical large image size (conservative)
+        # -> "avg" size might be preferrable for efficiency
+        image_size = stat["image"]["robust_max"]
+        object_size = None
+        if use_annotations and stat["annotation"]:
+            # Refine using annotation shape size stat
+            # Fit to typical small object size (conservative)
+            # -> "avg" size might be preferrable for efficiency
+            object_size = stat["annotation"].get("size_of_shape", {}).get("robust_min", None)
+
+        return input_size_manager.adapt_input_size_to_dataset(image_size, object_size, downscale_only)