Deci-AI · ofrimasad · Jun 21, 2023 · Jun 6, 2023 · Jun 6, 2023 · Jun 7, 2023
diff --git a/examples/example_detection_super_gradients.py b/examples/example_detection_super_gradients.py
@@ -24,6 +24,7 @@
         val_data=val_loader,
         class_names=train_loader.dataset.classes,
         batches_early_stop=20,
+        use_cache=True,  # With this we will be asked about the dataset information only once
     )
 
     analyzer.run()
diff --git a/examples/segmentation_example.py b/examples/segmentation_example.py
@@ -51,6 +51,7 @@
         # Optionals
         images_extractor=None,
         labels_extractor=None,
+        use_cache=True,
         threshold_soft_labels=0.5,
         batches_early_stop=75,
     )

diff --git a/src/data_gradients/batch_processors/adapters/dataset_adapter.py b/src/data_gradients/batch_processors/adapters/dataset_adapter.py
@@ -1,68 +1,92 @@
-from typing import Optional, Callable, Union, Tuple, List, Mapping, Any
+from typing import Callable, Union, Tuple, List, Mapping
 
 import PIL
 import numpy as np
 import torch
 from torchvision.transforms import transforms
 
-from data_gradients.batch_processors.adapters.tensor_extractor import TensorExtractor
+from data_gradients.batch_processors.adapters.tensor_extractor import get_tensor_extractor_options
+from data_gradients.config.data.data_config import DataConfig
+from data_gradients.config.data.questions import Question, text_to_yellow
+
+SupportedData = Union[Tuple, List, Mapping, Tuple, List]
 
 
 class DatasetAdapter:
     """Class responsible to convert raw batch (coming from dataloader) into a batch of image and a batch of labels."""
 
-    def __init__(self, images_extractor: Optional[Callable] = None, labels_extractor: Optional[Callable] = None):
-        """
-        :param images_extractor:    (Optional) function that takes the dataloader output and extract the images.
-                                    If None, the user will need to input it manually in a following prompt.
-        :param labels_extractor:    (Optional) function that takes the dataloader output and extract the labels.
-                                    If None, the user will need to input it manually in a following prompt.
-        """
-        self._tensor_extractor = {0: images_extractor, 1: labels_extractor}
+    def __init__(self, data_config: DataConfig):
+        self.data_config = data_config
 
-    def extract(self, objs: Union[Tuple, List, Mapping]) -> Tuple[torch.Tensor, torch.Tensor]:
+    def extract(self, data: SupportedData) -> Tuple[torch.Tensor, torch.Tensor]:
         """Convert raw batch (coming from dataloader) into a batch of image and a batch of labels.
 
-        :param objs: Raw batch (coming from dataloader without any modification).
+        :param data: Raw batch (coming from dataloader without any modification).
         :return:
             - images: Batch of images
             - labels: Batch of labels
         """
-        if isinstance(objs, (Tuple, List)) and len(objs) == 2:
-            images = objs[0] if isinstance(objs[0], torch.Tensor) else self._to_tensor(objs[0], tuple_idx=0)
-            labels = objs[1] if isinstance(objs[1], torch.Tensor) else self._to_tensor(objs[1], tuple_idx=1)
-        elif isinstance(objs, (Mapping, Tuple, List)):
-            images = self._extract_tensor_from_container(objs, 0)
-            labels = self._extract_tensor_from_container(objs, 1)
-        else:
-            raise NotImplementedError(f"Got object {type(objs)} from Iterator - supporting dict, tuples and lists Only!")
-        return images, labels
-
-    def _to_tensor(self, objs: Union[np.ndarray, PIL.Image.Image, Mapping], tuple_idx: int) -> torch.Tensor:
-        if isinstance(objs, np.ndarray):
-            return torch.from_numpy(objs)
-        elif isinstance(objs, PIL.Image.Image):
-            return transforms.ToTensor()(objs)
+        images = self._extract_images(data)
+        labels = self._extract_labels(data)
+        return self._to_torch(images), self._to_torch(labels)
+
+    def _extract_images(self, data: SupportedData) -> torch.Tensor:
+        images_extractor = self._get_images_extractor(data)
+        return images_extractor(data)
+
+    def _extract_labels(self, data: SupportedData) -> torch.Tensor:
+        labels_extractor = self._get_labels_extractor(data)
+        return labels_extractor(data)
+
+    def _get_images_extractor(self, data: SupportedData) -> Callable[[SupportedData], torch.Tensor]:
+        if self.data_config.images_extractor is not None:
+            return self.data_config.get_images_extractor()
+
+        # We use the heuristic that a tuple of 2 should represent (image, label) in this order
+        if isinstance(data, (Tuple, List)) and len(data) == 2:
+            if isinstance(data[0], (torch.Tensor, np.ndarray, PIL.Image.Image)):
+                self.data_config.images_extractor = "[0]"  # We save it for later use
+                return self.data_config.get_images_extractor()  # This will return a callable
+
+        # Otherwise, we ask the user how to map data -> image
+        if isinstance(data, (Tuple, List, Mapping, Tuple, List)):
+            description, options = get_tensor_extractor_options(data)
+            question = Question(question=f"Which tensor represents your {text_to_yellow('Image(s)')} ?", options=options)
+            return self.data_config.get_images_extractor(question=question, hint=description)
+
+        raise NotImplementedError(
+            f"Got object {type(data)} from Data Iterator which is not supported!\n"
+            f"Please implement a custom `images_extractor` for your dataset. "
+            f"You can find more detail about this in our documentation: https://github.com/Deci-AI/data-gradients"
+        )
+
+    def _get_labels_extractor(self, data: SupportedData) -> Callable[[SupportedData], torch.Tensor]:
+        if self.data_config.labels_extractor is not None:
+            return self.data_config.get_labels_extractor()
+
+        # We use the heuristic that a tuple of 2 should represent (image, label) in this order
+        if isinstance(data, (Tuple, List)) and len(data) == 2:
+            if isinstance(data[1], (torch.Tensor, np.ndarray, PIL.Image.Image)):
+                self.data_config.labels_extractor = "[1]"  # We save it for later use
+                return self.data_config.get_labels_extractor()  # This will return a callable
+
+        # Otherwise, we ask the user how to map data -> labels
+        if isinstance(data, (Tuple, List, Mapping, Tuple, List)):
+            description, options = get_tensor_extractor_options(data)
+            question = Question(question=f"Which tensor represents your {text_to_yellow('Label(s)')} ?", options=options)
+            return self.data_config.get_labels_extractor(question=question, hint=description)
+
+        raise NotImplementedError(
+            f"Got object {type(data)} from Data Iterator which is not supported!\n"
+            f"Please implement a custom `labels_extractor` for your dataset. "
+            f"You can find more detail about this in our documentation: https://github.com/Deci-AI/data-gradients"
+        )
+
+    @staticmethod
+    def _to_torch(tensor: Union[np.ndarray, PIL.Image.Image, torch.Tensor]) -> torch.Tensor:
+        if isinstance(tensor, np.ndarray):
+            return torch.from_numpy(tensor)
+        elif isinstance(tensor, PIL.Image.Image):
+            return transforms.ToTensor()(tensor)
         else:
-            return self._extract_tensor_from_container(objs=objs, tuple_idx=tuple_idx)
-
-    def _extract_tensor_from_container(self, objs: Any, tuple_idx: int) -> torch.Tensor:
-        mapping_fn = self._get_tensor_extractor(tuple_idx=tuple_idx, objs=objs)
-        return mapping_fn(objs)
-
-    def _get_tensor_extractor(self, objs: Any, tuple_idx: int) -> Union[Callable, TensorExtractor]:
-        if self._tensor_extractor[tuple_idx] is None:
-            self._tensor_extractor[tuple_idx] = TensorExtractor(objs=objs, name="image(s)" if (tuple_idx == 0) else "label(s)")
-        return self._tensor_extractor[tuple_idx]
-
-    @property
-    def images_route(self) -> List[str]:
-        """Represent the path (route) to extract the images from the raw batch (coming from dataloader)."""
-        tensor_finder = self._tensor_extractor[0]
-        return tensor_finder.path_to_tensor if isinstance(tensor_finder, TensorExtractor) else []
-
-    @property
-    def labels_route(self) -> List[str]:
-        """Represent the path (route) to extract the labels from the raw batch (coming from dataloader)."""
-        tensor_finder = self._tensor_extractor[1]
-        return tensor_finder.path_to_tensor if isinstance(tensor_finder, TensorExtractor) else []
+            return tensor