Hierarchical Labeling (#742)

* hierarchical labeling * add empty line to the end of json/remove duplicated test Co-authored-by: Wonju Lee <wonju.lee@intel.com>
openvinotoolkit · Oct 31, 2022 · a8f3cc9 · a8f3cc9
1 parent c90eb28
commit a8f3cc9
Show file tree

Hide file tree

Showing 12 changed files with 332 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## \[Unreleased\]
 ### Added
+- Support for exclusive of labels with LabelGroup
+  (<https://github.com/openvinotoolkit/datumaro/pull/742>)
 - Add jupyter sample introducing how to merge datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/738>)
 - Add Visualization Python API

diff --git a/datumaro/components/annotation.py b/datumaro/components/annotation.py
@@ -98,7 +98,14 @@ class Category:
         parent: str = field(default="", validator=default_if_none(str))
         attributes: Set[str] = field(factory=set, validator=default_if_none(set))
 
+    @attrs(slots=True, order=False)
+    class LabelGroup:
+        name: str = field(converter=str, validator=not_empty)
+        labels: List[str] = field(default=[], validator=default_if_none(list))
+        group_type: str = field(default="exclusive", validator=default_if_none(str))
+
     items: List[str] = field(factory=list, validator=default_if_none(list))
+    label_groups: List[str] = field(factory=list, validator=default_if_none(list))
     _indices: Dict[str, int] = field(factory=dict, init=False, eq=False)
 
     @classmethod
@@ -146,7 +153,10 @@ def _reindex(self):
         self._indices = indices
 
     def add(
-        self, name: str, parent: Optional[str] = None, attributes: Optional[Set[str]] = None
+        self,
+        name: str,
+        parent: Optional[str] = None,
+        attributes: Optional[Set[str]] = None,
     ) -> int:
         assert name
         assert name not in self._indices, name
@@ -156,6 +166,18 @@ def add(
         self._indices[name] = index
         return index
 
+    def add_label_group(
+        self,
+        name: str,
+        labels: List[str],
+        group_type: str,
+    ) -> int:
+        assert name
+
+        index = len(self.label_groups)
+        self.label_groups.append(self.LabelGroup(name, labels, group_type))
+        return index
+
     def find(self, name: str) -> Tuple[Optional[int], Optional[Category]]:
         index = self._indices.get(name)
         if index is not None:

diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py
@@ -268,9 +268,13 @@ def _convert_cuboid_3d_object(self, obj):
     def _convert_attribute_categories(self, attributes):
         return sorted(attributes)
 
+    def _convert_labels_label_groups(self, labels):
+        return sorted(labels)
+
     def _convert_label_categories(self, obj):
         converted = {
             "labels": [],
+            "label_groups": [],
             "attributes": self._convert_attribute_categories(obj.attributes),
         }
         for label in obj.items:
@@ -281,6 +285,14 @@ def _convert_label_categories(self, obj):
                     "attributes": self._convert_attribute_categories(label.attributes),
                 }
             )
+        for label_group in obj.label_groups:
+            converted["label_groups"].append(
+                {
+                    "name": cast(label_group.name, str),
+                    "group_type": cast(label_group.group_type, str),
+                    "labels": self._convert_labels_label_groups(label_group.labels),
+                }
+            )
         return converted
 
     def _convert_mask_categories(self, obj):

diff --git a/datumaro/plugins/datumaro_format/extractor.py b/datumaro/plugins/datumaro_format/extractor.py
@@ -65,7 +65,14 @@ def _load_categories(parsed):
             label_categories = LabelCategories(attributes=parsed_label_cat.get("attributes", []))
             for item in parsed_label_cat["labels"]:
                 label_categories.add(
-                    item["name"], parent=item["parent"], attributes=item.get("attributes", [])
+                    item["name"],
+                    parent=item["parent"],
+                    attributes=item.get("attributes", []),
+                )
+
+            for item in parsed_label_cat.get("label_groups", []):
+                label_categories.add_label_group(
+                    item["name"], labels=item["labels"], group_type=item["group_type"]
                 )
 
             categories[AnnotationType.label] = label_categories

diff --git a/tests/assets/datumaro_dataset/annotations/test.json b/tests/assets/datumaro_dataset/annotations/test.json
@@ -0,0 +1,77 @@
+{
+    "info": {},
+    "categories": {
+        "label": {
+            "label_groups": [
+                {
+                    "name": "manmade",
+                    "group_type": "exclusive",
+                    "labels": ["car", "bicycle"]
+                },
+                {
+                    "name": "empty_group",
+                    "group_type": "empty",
+                    "labels": ["tom", "mary"]
+                }
+            ],
+            "labels": [
+                {
+                    "name": "car",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "bicycle",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "tom",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "mary",
+                    "parent": "",
+                    "attributes": []
+                }
+            ],
+            "attributes": []
+        }
+    },
+    "items": [
+        {
+            "id": "c",
+            "annotations": [
+                {
+                    "id": 0,
+                    "type": "label",
+                    "attributes": {
+                        "score": 1.0
+                    },
+                    "group": 0,
+                    "label_id": 1
+                },
+                {
+                    "id": 0,
+                    "type": "label",
+                    "attributes": {
+                        "score": 1.0
+                    },
+                    "group": 0,
+                    "label_id": 3
+                }
+            ],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/test/c.jpg",
+                "size": [
+                    10,
+                    5
+                ]
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/test/c.jpg"
+            }
+        }
+    ]
+}
diff --git a/tests/assets/datumaro_dataset/annotations/train.json b/tests/assets/datumaro_dataset/annotations/train.json
@@ -0,0 +1,103 @@
+{
+    "info": {},
+    "categories": {
+        "label": {
+            "label_groups": [
+                {
+                    "name": "manmade",
+                    "group_type": "exclusive",
+                    "labels": ["car", "bicycle"]
+                },
+                {
+                    "name": "empty_group",
+                    "group_type": "empty",
+                    "labels": ["tom", "mary"]
+                }
+            ],
+            "labels": [
+                {
+                    "name": "car",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "bicycle",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "tom",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "mary",
+                    "parent": "",
+                    "attributes": []
+                }
+            ],
+            "attributes": []
+        }
+    },
+    "items": [
+        {
+            "id": "a",
+            "annotations": [
+                {
+                    "id": 0,
+                    "type": "label",
+                    "attributes": {
+                        "score": 1.0
+                    },
+                    "group": 0,
+                    "label_id": 0
+                }
+            ],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/train/a.jpg"
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/train/a.jpg"
+            }
+        },
+        {
+            "id": "b",
+            "annotations": [
+                {
+                    "id": 0,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 0
+                },
+                {
+                    "id": 1,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 1
+                },
+                {
+                    "id": 2,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 2
+                },
+                {
+                    "id": 3,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 5
+                }
+            ],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/train/b.jpg",
+                "size": [
+                    2,
+                    8
+                ]
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/train/b.jpg"
+            }
+        }
+    ]
+}
diff --git a/tests/assets/datumaro_dataset/annotations/validation.json b/tests/assets/datumaro_dataset/annotations/validation.json
@@ -0,0 +1,54 @@
+{
+    "info": {},
+    "categories": {
+        "label": {
+            "label_groups": [
+                {
+                    "name": "manmade",
+                    "group_type": "exclusive",
+                    "labels": ["car", "bicycle"]
+                },
+                {
+                    "name": "empty_group",
+                    "group_type": "empty",
+                    "labels": ["tom", "mary"]
+                }
+            ],
+            "labels": [
+                {
+                    "name": "car",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "bicycle",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "tom",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "mary",
+                    "parent": "",
+                    "attributes": []
+                }
+            ],
+            "attributes": []
+        }
+    },
+    "items": [
+        {
+            "id": "d",
+            "annotations": [],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/validation/d.png"
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/validation/d.png"
+            }
+        }
+    ]
+}
diff --git a/tests/assets/datumaro_dataset/images/test/c.jpg b/tests/assets/datumaro_dataset/images/test/c.jpg
diff --git a/tests/assets/datumaro_dataset/images/train/a.jpg b/tests/assets/datumaro_dataset/images/train/a.jpg
diff --git a/tests/assets/datumaro_dataset/images/train/b.jpg b/tests/assets/datumaro_dataset/images/train/b.jpg
diff --git a/tests/assets/datumaro_dataset/images/validation/d.png b/tests/assets/datumaro_dataset/images/validation/d.png
diff --git a/tests/test_labeling.py b/tests/test_labeling.py
@@ -0,0 +1,53 @@
+# Copyright (C) 2019-2022 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+import tempfile
+from unittest.case import TestCase
+
+import numpy as np
+
+from datumaro.components.annotation import AnnotationType, Label, LabelCategories
+from datumaro.components.extractor import DatasetItem
+from datumaro.components.media import Image
+from datumaro.components.project import Dataset
+
+from .requirements import Requirements, mark_requirement
+
+
+class LabelingTest(TestCase):
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_label_group(self):
+        label_categories = LabelCategories()
+        label_categories.add("car", parent="")
+        label_categories.add("bicycle", parent="")
+
+        label_categories.add_label_group("manmade", ["car", "bicycle"], group_type="exclusive")
+
+        dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=0,
+                    subset="train",
+                    media=Image(data=np.ones((10, 6, 3))),
+                    annotations=[
+                        Label(
+                            0,
+                            id=0,
+                        ),
+                        Label(
+                            1,
+                            id=1,
+                        ),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: label_categories,
+            },
+        )
+
+        with tempfile.TemporaryDirectory() as temp_home:
+            dataset.export(temp_home, format="datumaro")
+            dataset_imported = Dataset.import_from(temp_home, format="datumaro")
+
+        self.assertEqual(len(dataset_imported.categories()[AnnotationType.label].label_groups), 1)