openvinotoolkit · vinnamkim · Oct 24, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 23, 2023
@@ -6,7 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## \[Unreleased\]
+### Enhancements
+- Optimize Python import to make CLI entrypoint faster
+  (<https://github.com/openvinotoolkit/datumaro/pull/1182>)
 
+## 16/11/2023 - Release 1.5.1
 ### Enhancements
 - Enhance Datumaro data format stream importer performance
   (<https://github.com/openvinotoolkit/datumaro/pull/1153>)

@@ -49,6 +49,9 @@ exclude_lines = [
     # Don't complain if non-runnable code isn't run:
     'if 0:',
     'if __name__ == .__main__.:',
+
+    # Don't complain for the type checking code:
+    'if TYPE_CHECKING:'
 ]
 
 # don't fail on the code that can be found

@@ -9,7 +9,6 @@
 from shutil import rmtree
 
 from datumaro.cli.util.errors import CliException
-from datumaro.plugins.synthetic_data import FractalImageGenerator
 from datumaro.util.definitions import get_datumaro_cache_dir
 
 from ..util import MultilineFormatter
@@ -76,6 +75,8 @@ def get_sensitive_args():
 
 
 def generate_command(args):
+    from datumaro.plugins.synthetic_data import FractalImageGenerator
+
     log.info("Generating dataset...")
     output_dir = args.output_dir
 

@@ -9,21 +9,27 @@
 from collections import Counter
 from enum import Enum, auto
 from itertools import zip_longest
-from typing import Union
+from typing import TYPE_CHECKING, Union
 
 import cv2
 import numpy as np
 
-from datumaro.components.media import Image
-
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    import tensorboardX as tb
-
 from datumaro.components.annotation import AnnotationType, LabelCategories
 from datumaro.components.dataset import IDataset
+from datumaro.components.media import Image
 from datumaro.util import parse_str_enum_value
 from datumaro.util.image import save_image
+from datumaro.util.import_util import lazy_import
+
+if TYPE_CHECKING:
+    import matplotlib.pyplot as plt
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        import tensorboardX as tb
+else:
+    tb = lazy_import("tensorboardX")
+    plt = lazy_import("matplotlib.pyplot")
 
 
 class DistanceCompareVisualizer:
@@ -291,8 +297,6 @@ def save_as_tensorboard(self, img, name):
         self._file_writer.add_image(name, img)
 
     def save_conf_matrix(self, conf_matrix, filename):
-        import matplotlib.pyplot as plt
-
         def _get_class_map(label_categories):
             classes = None
             if label_categories is not None:

@@ -2,10 +2,16 @@
 #
 # SPDX-License-Identifier: MIT
 
-from typing import Sequence
+from typing import TYPE_CHECKING, Sequence
 
 from datumaro.components.dataset import Dataset
-from datumaro.plugins.explorer import ExplorerLauncher
+
+if TYPE_CHECKING:
+    import datumaro.plugins.explorer as explorer
+else:
+    from datumaro.util.import_util import lazy_import
+
+    explorer = lazy_import("datumaro.plugins.explorer")
 
 
 class HashInference:
@@ -15,13 +21,13 @@ def __init__(self, *datasets: Sequence[Dataset]) -> None:
     @property
     def model(self):
         if self._model is None:
-            self._model = ExplorerLauncher(model_name="clip_visual_ViT-B_32")
+            self._model = explorer.ExplorerLauncher(model_name="clip_visual_ViT-B_32")
         return self._model
 
     @property
     def text_model(self):
         if self._text_model is None:
-            self._text_model = ExplorerLauncher(model_name="clip_text_ViT-B_32")
+            self._text_model = explorer.ExplorerLauncher(model_name="clip_text_ViT-B_32")
         return self._text_model
 
     def _compute_hash_key(self, datasets, datasets_to_infer):

@@ -6,12 +6,10 @@
 import math
 import random
 from abc import ABC, abstractmethod
-from typing import Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
 import numpy as np
-from sklearn.cluster import KMeans
 
-import datumaro.plugins.ndr as ndr
 from datumaro.components.algorithms.hash_key_inference.base import HashInference
 from datumaro.components.algorithms.hash_key_inference.hashkey_util import (
     calculate_hamming,
@@ -23,6 +21,13 @@
 from datumaro.components.dataset import Dataset
 from datumaro.components.dataset_base import DatasetItem
 
+if TYPE_CHECKING:
+    import datumaro.plugins.ndr as ndr
+else:
+    from datumaro.util.import_util import lazy_import
+
+    ndr = lazy_import("datumaro.plugins.ndr")
+
 
 def match_num_item_for_cluster(ratio, dataset_len, cluster_num_item_list):
     total_num_selected_item = math.ceil(dataset_len * ratio)
@@ -94,6 +99,8 @@ class Centroid(PruneBase):
     """
 
     def base(self, ratio, num_centers, labels, database_keys, item_list, source):
+        from sklearn.cluster import KMeans
+
         num_selected_centers = math.ceil(len(item_list) * ratio)
         kmeans = KMeans(n_clusters=num_selected_centers, random_state=0)
         clusters = kmeans.fit_predict(database_keys)
@@ -124,6 +131,8 @@ class ClusteredRandom(PruneBase):
     """
 
     def base(self, ratio, num_centers, labels, database_keys, item_list, source):
+        from sklearn.cluster import KMeans
+
         kmeans = KMeans(n_clusters=num_centers, random_state=0)
         clusters = kmeans.fit_predict(database_keys)
         cluster_ids, cluster_num_item_list = np.unique(clusters, return_counts=True)
@@ -148,6 +157,8 @@ class QueryClust(PruneBase):
     """
 
     def base(self, ratio, num_centers, labels, database_keys, item_list, source):
+        from sklearn.cluster import KMeans
+
         center_dict = {i: None for i in range(1, num_centers)}
         for item in item_list:
             for anno in item.annotations:
@@ -199,6 +210,8 @@ class Entropy(PruneBase):
     """
 
     def base(self, ratio, num_centers, labels, database_keys, item_list, source):
+        from sklearn.cluster import KMeans
+
         kmeans = KMeans(n_clusters=num_centers, random_state=0)
         clusters = kmeans.fit_predict(database_keys)
 

@@ -4,16 +4,19 @@
 
 from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Sequence, Tuple
-
-import matplotlib.pyplot as plt
-import pandas as pd
-from matplotlib.figure import Figure
+from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple
 
 from datumaro.components.annotation import AnnotationType, LabelCategories
 from datumaro.components.dataset_base import IDataset
 from datumaro.errors import DatasetError
 
+if TYPE_CHECKING:
+    from matplotlib.figure import Figure
+    from pandas import DataFrame, Series
+else:
+    DataFrame, Series, Figure = None, None, None
+
+
 __all__ = ["LossDynamicsAnalyzer", "NoisyLabelCandidate"]
 
 
@@ -92,30 +95,32 @@ def alpha(self) -> float:
         return self._alpha
 
     @property
-    def mean_loss_dyns(self) -> pd.Series:
+    def mean_loss_dyns(self) -> Series:
         """Pandas Series object obtained by averaging all EMA loss dynamics statistics"""
         return self._mean_loss_dyns
 
     @property
-    def mean_loss_dyns_per_label(self) -> Dict[LabelCategories.Category, pd.Series]:
+    def mean_loss_dyns_per_label(self) -> Dict[LabelCategories.Category, Series]:
         """A dictionary of Pandas Series object obtained
         by averaging EMA loss dynamics statistics according to the label category"""
         label_categories = self._dataset.categories()[AnnotationType.label]
         return {label_categories[k]: v for k, v in self._mean_loss_dyns_per_label.items()}
 
     @property
-    def ema_dataframe(self) -> pd.DataFrame:
+    def ema_dataframe(self) -> DataFrame:
         """Pandas DataFrame including full EMA loss dynamics statistics."""
         return self._df
 
     @staticmethod
     def _parse_to_dataframe(
         dataset: IDataset, ema_alpha: float = 0.001, tracking_loss_type: Optional[str] = None
-    ) -> pd.DataFrame:
+    ) -> DataFrame:
         """Parse loss dynamics statistics from Datumaro dataset to Pandas DataFrame."""
         key = (
             "loss_dynamics" if tracking_loss_type is None else f"loss_dynamics_{tracking_loss_type}"
         )
+        import pandas as pd
+
         ema_loss_dyns_list = []
         for item in dataset:
             for ann in item.annotations:
@@ -167,6 +172,8 @@ def plot_ema_loss_dynamics(
         figsize: Tuple[int, int] = (4, 3),
         **kwargs,
     ) -> Figure:
+        import matplotlib.pyplot as plt
+
         if mode == "mean":
             cands_by_label_id = {None: candidates}
         elif mode == "label_mean":

@@ -7,7 +7,7 @@
 import logging as log
 import os.path as osp
 from functools import partial
-from inspect import isclass
+from inspect import getmodule, isclass
 from typing import (
     Callable,
     Dict,
@@ -202,7 +202,18 @@ def _get_plugin_exports(cls, module, types):
                     continue
                 exports.append(getattr(module, symbol))
 
-        exports = [s for s in exports if isclass(s) and issubclass(s, types) and s not in types]
+        exports = [
+            s
+            for s in exports
+            if isclass(s)
+            and issubclass(s, types)
+            and s not in types
+            and (
+                getmodule(s)
+                is None  # Custom plugin (in the Datumaro project) can be a single file and have no module
+                or not getmodule(s).__package__.startswith("datumaro.components")
+            )
+        ]
 
         return exports
 

@@ -7,8 +7,21 @@
 import itertools
 import logging as log
 import os.path as osp
+from importlib.util import find_spec
 from types import SimpleNamespace as namespace
-from typing import Any, Callable, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Type, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Iterator,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
 
 import attrs
 import numpy as np
@@ -19,17 +32,21 @@
 from datumaro.components.media import Image, MediaElement
 from datumaro.util.tf_util import import_tf
 
-try:
-    tf = import_tf()
-    import tensorflow_datasets as tfds
-except ImportError:
-    log.debug(
-        "Unable to import TensorFlow or TensorFlow Datasets. "
-        "Dataset downloading via TFDS is disabled."
-    )
-    TFDS_EXTRACTOR_AVAILABLE = False
+TFDS_EXTRACTOR_AVAILABLE = True if find_spec("tensorflow_datasets") is not None else False
+
+if TYPE_CHECKING:
+    try:
+        tf = import_tf()
+        import tensorflow_datasets as tfds
+    except ImportError:
+        log.debug(
+            "Unable to import TensorFlow or TensorFlow Datasets. "
+            "Dataset downloading via TFDS is disabled."
+        )
 else:
-    TFDS_EXTRACTOR_AVAILABLE = True
+    from datumaro.util.import_util import lazy_import
+
+    tfds = lazy_import("tensorflow_datasets")
 
 
 @frozen(kw_only=True)

@@ -2,8 +2,10 @@
 #
 # SPDX-License-Identifier: MIT
 
+import logging as log
 from abc import ABC, abstractclassmethod
 from importlib import import_module
+from importlib.util import find_spec
 from typing import List, Optional, Sequence, Type, Union
 
 from datumaro.components.dataset_base import DatasetBase
@@ -56,11 +58,11 @@ def get_lazy_plugin(
     plugin_type: str,
     extra_deps: List[str] = [],
 ) -> Optional[LazyPlugin]:
-    try:
-        for extra_dep in extra_deps:
-            import_module(extra_dep)
-    except ImportError:
-        return None
+    for extra_dep in extra_deps:
+        spec = find_spec(extra_dep)
+        if spec is None:
+            log.debug(f"Cannot import extra dep={extra_dep} for plugin_name={plugin_name}.")
+            return None
 
     plugin_type_cls = STR_TO_PLUGIN_TYPES[plugin_type]
 

@@ -13,6 +13,7 @@
 from copy import deepcopy
 from enum import IntEnum
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -29,7 +30,6 @@
 
 import cv2
 import numpy as np
-import pandas as pd
 
 from datumaro.components.crypter import NULL_CRYPTER, Crypter
 from datumaro.components.errors import DatumaroError, MediaShapeError
@@ -42,6 +42,14 @@
     save_image,
 )
 
+if TYPE_CHECKING:
+    import pandas as pd
+else:
+    from datumaro.util.import_util import lazy_import
+
+    pd = lazy_import("pandas")
+
+
 AnyData = TypeVar("AnyData", bytes, np.ndarray)