openvinotoolkit
diff --git a/‎CHANGELOG.md
+2 b/‎CHANGELOG.md
+2
diff --git a/‎src/datumaro/plugins/data_formats/cityscapes.py
+21-19 b/‎src/datumaro/plugins/data_formats/cityscapes.py
+21-19
diff --git a/‎src/datumaro/plugins/data_formats/kaggle/base.py
+4-2 b/‎src/datumaro/plugins/data_formats/kaggle/base.py
+4-2
diff --git a/‎tests/assets/cityscapes_dataset/dataset/gtFine/test/defaultcity/defaultcity_000001_000031_gtFine_instanceIds.png
-5 Bytes b/‎tests/assets/cityscapes_dataset/dataset/gtFine/test/defaultcity/defaultcity_000001_000031_gtFine_instanceIds.png
-5 Bytes
diff --git a/‎tests/assets/cityscapes_dataset/dataset/gtFine/test/defaultcity/defaultcity_000001_000032_gtFine_instanceIds.png
-5 Bytes b/‎tests/assets/cityscapes_dataset/dataset/gtFine/test/defaultcity/defaultcity_000001_000032_gtFine_instanceIds.png
-5 Bytes
diff --git a/‎tests/assets/cityscapes_dataset/dataset/gtFine/train/defaultcity/defaultcity_000002_000045_gtFine_instanceIds.png
-5 Bytes b/‎tests/assets/cityscapes_dataset/dataset/gtFine/train/defaultcity/defaultcity_000002_000045_gtFine_instanceIds.png
-5 Bytes
diff --git a/‎tests/assets/cityscapes_dataset/dataset/gtFine/val/defaultcity/defaultcity_000001_000019_gtFine_instanceIds.png
-5 Bytes b/‎tests/assets/cityscapes_dataset/dataset/gtFine/val/defaultcity/defaultcity_000001_000019_gtFine_instanceIds.png
-5 Bytes
@@ -35,6 +35,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/1245>)
 - Enable image backend and color channel format to be selectable
   (<https://github.com/openvinotoolkit/datumaro/pull/1246>)
+- Boost up `CityscapesBase` and `KaggleImageMaskBase` by dropping `np.unique`
+  (<https://github.com/openvinotoolkit/datumaro/pull/1261>)
 - Enhance RISE algortihm for explainable AI
   (<https://github.com/openvinotoolkit/datumaro/pull/1263>)
 - Enhance explore unit test to use real dataset from ImageNet
 
@@ -265,30 +265,35 @@ def _load_items(self):
                 recursive=True,
             )
             mask_suffix = CityscapesPath.GT_INSTANCE_MASK_SUFFIX
+
+        self._categories = self._load_categories(
+            self._path, use_train_label_map=mask_suffix is CityscapesPath.LABEL_TRAIN_IDS_SUFFIX
+        )
+
+        label_ids = []
+        for label_cat in self._categories[AnnotationType.label]:
+            label_id, _ = self._categories[AnnotationType.label].find(label_cat.name)
+            if label_id:
+                label_ids.append(label_id)
+
         for mask_path in masks:
             item_id = self._get_id_from_mask_path(mask_path, mask_suffix)
 
             anns = []
             instances_mask = load_image(mask_path, dtype=np.int32)
-            segm_ids = np.unique(instances_mask)
-            for segm_id in segm_ids:
-                # either is_crowd or ann_id should be set
-                if segm_id < 1000:
-                    label_id = segm_id
-                    is_crowd = True
-                    ann_id = None
-                else:
-                    label_id = segm_id // 1000
-                    is_crowd = False
-                    ann_id = segm_id % 1000
+            mask_id = 1
+            for label_id in label_ids:
+                if label_id not in instances_mask:
+                    continue
+                binary_mask = self._lazy_extract_mask(instances_mask, label_id)
                 anns.append(
                     Mask(
-                        image=self._lazy_extract_mask(instances_mask, segm_id),
+                        id=mask_id,
+                        image=binary_mask,
                         label=label_id,
-                        id=ann_id,
-                        attributes={"is_crowd": is_crowd},
                     )
                 )
+                mask_id += 1
 
             image = image_path_by_id.pop(item_id, None)
             if image:
@@ -303,9 +308,6 @@ def _load_items(self):
                 id=item_id, subset=self._subset, media=Image.from_file(path=path)
             )
 
-        self._categories = self._load_categories(
-            self._path, use_train_label_map=mask_suffix is CityscapesPath.LABEL_TRAIN_IDS_SUFFIX
-        )
         return items
 
     @staticmethod
@@ -429,8 +431,8 @@ def _apply_impl(self):
                     masks,
                     instance_ids=[
                         self._label_id_mapping(m.label)
-                        if m.attributes.get("is_crowd", False)
-                        else self._label_id_mapping(m.label) * 1000 + (m.id or (i + 1))
+                        # if m.attributes.get("is_crowd", False)
+                        # else self._label_id_mapping(m.label) * 1000 + (m.id or (i + 1))
                         for i, m in enumerate(masks)
                     ],
                     instance_labels=[self._label_id_mapping(m.label) for m in masks],
 
@@ -215,6 +215,7 @@ def __init__(
         self._path = path
         self._mask_path = mask_path
 
+        self._label_ids = []
         self._categories = self._load_categories(labelmap_file)
         self._items = self._load_items()
 
@@ -241,6 +242,7 @@ def _load_categories(self, label_map_file: Optional[str]):
         for label_name, label_color in label_map.items():
             label_id = label_categories.find(label_name)[0]
             colormap[label_id] = label_color
+            self._label_ids.append(label_id)
 
         categories[AnnotationType.mask] = MaskCategories(colormap)
 
@@ -260,8 +262,8 @@ def _lazy_extract_mask(mask, c):
                     instances_mask = load_image(
                         osp.join(self._mask_path, mask_name), dtype=np.int32
                     )
-                    label_ids = np.unique(instances_mask)
-                    for label_id in label_ids:
+                    # label_ids = np.unique(instances_mask)
+                    for label_id in self._label_ids:
                         anns.append(
                             Mask(
                                 image=_lazy_extract_mask(instances_mask, label_id),