From 8eb370892c7363910c60cd7604b97cb018319b96 Mon Sep 17 00:00:00 2001
From: bonhun koo <bonhun.koo@intel.com>
Date: Thu, 20 Oct 2022 02:58:50 +0900
Subject: [PATCH] hierarchical labeling

---
 CHANGELOG.md                                  |   2 +
 datumaro/components/annotation.py             |  24 +++-
 datumaro/plugins/datumaro_format/converter.py |  14 +++
 datumaro/plugins/datumaro_format/extractor.py |   9 +-
 .../datumaro_dataset/annotations/test.json    |  77 +++++++++++++
 .../datumaro_dataset/annotations/train.json   | 103 ++++++++++++++++++
 .../annotations/validation.json               |  54 +++++++++
 .../assets/datumaro_dataset/images/test/c.jpg | Bin 0 -> 631 bytes
 .../datumaro_dataset/images/train/a.jpg       | Bin 0 -> 631 bytes
 .../datumaro_dataset/images/train/b.jpg       | Bin 0 -> 631 bytes
 .../datumaro_dataset/images/validation/d.png  | Bin 0 -> 70 bytes
 tests/test_labeling.py                        |  85 +++++++++++++++
 12 files changed, 366 insertions(+), 2 deletions(-)
 create mode 100644 tests/assets/datumaro_dataset/annotations/test.json
 create mode 100644 tests/assets/datumaro_dataset/annotations/train.json
 create mode 100644 tests/assets/datumaro_dataset/annotations/validation.json
 create mode 100644 tests/assets/datumaro_dataset/images/test/c.jpg
 create mode 100644 tests/assets/datumaro_dataset/images/train/a.jpg
 create mode 100644 tests/assets/datumaro_dataset/images/train/b.jpg
 create mode 100644 tests/assets/datumaro_dataset/images/validation/d.png
 create mode 100644 tests/test_labeling.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84c35ac044..afe26adeb6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Add jupyter sample introducing how to merge datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/738>)
+- Support for exclusive of labels with LabelGroup
+  (<https://github.com/openvinotoolkit/datumaro/pull/742>)
 
 ## 06/09/2022 - Release v0.3.1
 ### Added
diff --git a/datumaro/components/annotation.py b/datumaro/components/annotation.py
index baa5376dd1..2ebec612b9 100644
--- a/datumaro/components/annotation.py
+++ b/datumaro/components/annotation.py
@@ -98,7 +98,14 @@ class Category:
         parent: str = field(default="", validator=default_if_none(str))
         attributes: Set[str] = field(factory=set, validator=default_if_none(set))
 
+    @attrs(slots=True, order=False)
+    class LabelGroup:
+        name: str = field(converter=str, validator=not_empty)
+        labels: List[str] = field(default=[], validator=default_if_none(list))
+        group_type: str = field(default="exclusive", validator=default_if_none(str))
+
     items: List[str] = field(factory=list, validator=default_if_none(list))
+    label_groups: List[str] = field(factory=list, validator=default_if_none(list))
     _indices: Dict[str, int] = field(factory=dict, init=False, eq=False)
 
     @classmethod
@@ -146,7 +153,10 @@ def _reindex(self):
         self._indices = indices
 
     def add(
-        self, name: str, parent: Optional[str] = None, attributes: Optional[Set[str]] = None
+        self,
+        name: str,
+        parent: Optional[str] = None,
+        attributes: Optional[Set[str]] = None,
     ) -> int:
         assert name
         assert name not in self._indices, name
@@ -156,6 +166,18 @@ def add(
         self._indices[name] = index
         return index
 
+    def add_label_group(
+        self,
+        name: str,
+        labels: List[str],
+        group_type: str,
+    ) -> int:
+        assert name
+
+        index = len(self.label_groups)
+        self.label_groups.append(self.LabelGroup(name, labels, group_type))
+        return index
+
     def find(self, name: str) -> Tuple[Optional[int], Optional[Category]]:
         index = self._indices.get(name)
         if index is not None:
diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py
index dce8ca38b8..251c5bc802 100644
--- a/datumaro/plugins/datumaro_format/converter.py
+++ b/datumaro/plugins/datumaro_format/converter.py
@@ -7,12 +7,14 @@
 import os
 import os.path as osp
 import shutil
+from collections import defaultdict
 
 import numpy as np
 import pycocotools.mask as mask_utils
 
 from datumaro.components.annotation import (
     Annotation,
+    AnnotationType,
     Bbox,
     Caption,
     Cuboid3d,
@@ -268,9 +270,13 @@ def _convert_cuboid_3d_object(self, obj):
     def _convert_attribute_categories(self, attributes):
         return sorted(attributes)
 
+    def _convert_labels_label_groups(self, labels):
+        return sorted(labels)
+
     def _convert_label_categories(self, obj):
         converted = {
             "labels": [],
+            "label_groups": [],
             "attributes": self._convert_attribute_categories(obj.attributes),
         }
         for label in obj.items:
@@ -281,6 +287,14 @@ def _convert_label_categories(self, obj):
                     "attributes": self._convert_attribute_categories(label.attributes),
                 }
             )
+        for label_group in obj.label_groups:
+            converted["label_groups"].append(
+                {
+                    "name": cast(label_group.name, str),
+                    "group_type": cast(label_group.group_type, str),
+                    "labels": self._convert_labels_label_groups(label_group.labels),
+                }
+            )
         return converted
 
     def _convert_mask_categories(self, obj):
diff --git a/datumaro/plugins/datumaro_format/extractor.py b/datumaro/plugins/datumaro_format/extractor.py
index 62f781169d..a47085d5cc 100644
--- a/datumaro/plugins/datumaro_format/extractor.py
+++ b/datumaro/plugins/datumaro_format/extractor.py
@@ -65,7 +65,14 @@ def _load_categories(parsed):
             label_categories = LabelCategories(attributes=parsed_label_cat.get("attributes", []))
             for item in parsed_label_cat["labels"]:
                 label_categories.add(
-                    item["name"], parent=item["parent"], attributes=item.get("attributes", [])
+                    item["name"],
+                    parent=item["parent"],
+                    attributes=item.get("attributes", []),
+                )
+
+            for item in parsed_label_cat["label_groups"]:
+                label_categories.add_label_group(
+                    item["name"], labels=item["labels"], group_type=item["group_type"]
                 )
 
             categories[AnnotationType.label] = label_categories
diff --git a/tests/assets/datumaro_dataset/annotations/test.json b/tests/assets/datumaro_dataset/annotations/test.json
new file mode 100644
index 0000000000..12aabb1088
--- /dev/null
+++ b/tests/assets/datumaro_dataset/annotations/test.json
@@ -0,0 +1,77 @@
+{
+    "info": {},
+    "categories": {
+        "label": {
+            "label_groups": [
+                {
+                    "name": "manmade",
+                    "group_type": "exclusive",
+                    "labels": ["car", "bicycle"]
+                },
+                {
+                    "name": "empty_group",
+                    "group_type": "empty",
+                    "labels": ["tom", "mary"]
+                }
+            ],
+            "labels": [
+                {
+                    "name": "car",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "bicycle",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "tom",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "mary",
+                    "parent": "",
+                    "attributes": []
+                }
+            ],
+            "attributes": []
+        }
+    },
+    "items": [
+        {
+            "id": "c",
+            "annotations": [
+                {
+                    "id": 0,
+                    "type": "label",
+                    "attributes": {
+                        "score": 1.0
+                    },
+                    "group": 0,
+                    "label_id": 1
+                },
+                {
+                    "id": 0,
+                    "type": "label",
+                    "attributes": {
+                        "score": 1.0
+                    },
+                    "group": 0,
+                    "label_id": 3
+                }
+            ],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/test/c.jpg",
+                "size": [
+                    10,
+                    5
+                ]
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/test/c.jpg"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/assets/datumaro_dataset/annotations/train.json b/tests/assets/datumaro_dataset/annotations/train.json
new file mode 100644
index 0000000000..b24a77ab78
--- /dev/null
+++ b/tests/assets/datumaro_dataset/annotations/train.json
@@ -0,0 +1,103 @@
+{
+    "info": {},
+    "categories": {
+        "label": {
+            "label_groups": [
+                {
+                    "name": "manmade",
+                    "group_type": "exclusive",
+                    "labels": ["car", "bicycle"]
+                },
+                {
+                    "name": "empty_group",
+                    "group_type": "empty",
+                    "labels": ["tom", "mary"]
+                }
+            ],
+            "labels": [
+                {
+                    "name": "car",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "bicycle",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "tom",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "mary",
+                    "parent": "",
+                    "attributes": []
+                }
+            ],
+            "attributes": []
+        }
+    },
+    "items": [
+        {
+            "id": "a",
+            "annotations": [
+                {
+                    "id": 0,
+                    "type": "label",
+                    "attributes": {
+                        "score": 1.0
+                    },
+                    "group": 0,
+                    "label_id": 0
+                }
+            ],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/train/a.jpg"
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/train/a.jpg"
+            }
+        },
+        {
+            "id": "b",
+            "annotations": [
+                {
+                    "id": 0,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 0
+                },
+                {
+                    "id": 1,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 1
+                },
+                {
+                    "id": 2,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 2
+                },
+                {
+                    "id": 3,
+                    "type": "label",
+                    "group": 0,
+                    "label_id": 5
+                }
+            ],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/train/b.jpg",
+                "size": [
+                    2,
+                    8
+                ]
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/train/b.jpg"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/assets/datumaro_dataset/annotations/validation.json b/tests/assets/datumaro_dataset/annotations/validation.json
new file mode 100644
index 0000000000..4367b0af57
--- /dev/null
+++ b/tests/assets/datumaro_dataset/annotations/validation.json
@@ -0,0 +1,54 @@
+{
+    "info": {},
+    "categories": {
+        "label": {
+            "label_groups": [
+                {
+                    "name": "manmade",
+                    "group_type": "exclusive",
+                    "labels": ["car", "bicycle"]
+                },
+                {
+                    "name": "empty_group",
+                    "group_type": "empty",
+                    "labels": ["tom", "mary"]
+                }
+            ],
+            "labels": [
+                {
+                    "name": "car",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "bicycle",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "tom",
+                    "parent": "",
+                    "attributes": []
+                },
+                {
+                    "name": "mary",
+                    "parent": "",
+                    "attributes": []
+                }
+            ],
+            "attributes": []
+        }
+    },
+    "items": [
+        {
+            "id": "d",
+            "annotations": [],
+            "image": {
+                "path": "../tests/assets/datumaro_dataset/images/validation/d.png"
+            },
+            "media": {
+                "path": "../tests/assets/datumaro_dataset/images/validation/d.png"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/assets/datumaro_dataset/images/test/c.jpg b/tests/assets/datumaro_dataset/images/test/c.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8bce84d3bf50bd756621338e0da944a42428fb06
GIT binary patch
literal 631
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=Bjg6g+m4ls~os*M;i${c)hnt&6Qb?Fz
zL{>^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<<Uft5x!pKI?*u)5A2qPyaCl5D|1TU|I
zk)n*E5y{~H0R}-11}+9xW=16jCP7AKLB{__803NOWMu>c1}I=;VrF4wW9Q)H;sz?%
zD!{<R$jr>d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~
znmD<{#3dx9RMpfqG__1j&CD$<t(;w4-P}Dqy@EqR!@?sXqmoln)6z3Cvx-Yf%gQS%
ztD0L{+uA!iyCzMZGIiSY88c@sTD)ZGvgIpQuG+MD%hqk%ckJAC=<t!F$Bv&kdFs;T
zD_5^wzj5={!$*&wJbm{3#miS8KYjl4_1pI!KYxMz#mK-6@fMJTcnr;7f<S*Uv9K_+
zu!H=?$W#u*%z`YeiiT`Lj)Clng~CckjT|CQ6Blkg$f;}`^g%SK=pvVxipfLOk07ss
meMX$en#l4Q++zrT-D2QjW&}navmk>#!v`*nMGf}<Zvp`MyUm#Z

literal 0
HcmV?d00001

diff --git a/tests/assets/datumaro_dataset/images/train/a.jpg b/tests/assets/datumaro_dataset/images/train/a.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..864035b7f434bb7f8283484d7d380b25d88992ea
GIT binary patch
literal 631
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=Bjg6g+m4ls~os*M;i${c)hnt&6Qb?Fz
zL{>^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<<Uft5x!pKI?*u)5A2qPyaCl5D|1TU|I
zk)n*E5y{~H0R}-11`Y-`W=16jCP7AKLB{__803NOWMu>c1}I=;VrF4wW9Q)H;sz?%
zD!{<R$jr>d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~
znmD<{#3dx9RMpfqG__1j&CD$<t(;w4-P}Dqy@EqR!@?sXqmoln)6z3Cvx-Yf%gQS%
ztD0L{+uA!iyCzMZGIiSY88c@sTD)ZGvgIpQuG+MD%hqk%ckJAC=<t!F$Bv&kdFs;T
zD_5^wzj5={!$*&wJbm{3#miS8KYjl4_1pI!KYxMz#mK-6@fMJTcnr;7f<S*Uv9K_+
zu!H=?$W#u*%z`YeiiT`Lj)Clng~CckjT|CQ6Blkg$f;}`^g%SK=pvVxipfLOk07ss
meMX$en#l4Q++zrT-D2QjW&}navmk>#!_R+R8VmUU-vj{uw9ah+

literal 0
HcmV?d00001

diff --git a/tests/assets/datumaro_dataset/images/train/b.jpg b/tests/assets/datumaro_dataset/images/train/b.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0ab7dbe4a41973063285ddadd8f7a2d10ca91c45
GIT binary patch
literal 631
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=Bjg6g+m4ls~os*M;i${c)hnt&6Qb?Fz
zL{>^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<<Uft5x!pKI?*u)5A2qPyaCl5D|1TU|I
zk)n*E5y{~H0R}-11||j$W=16jCP7AKLB{__803NOWMu>c1}I=;VrF4wW9Q)H;sz?%
zD!{<R$jr>d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~
znmD<{#3dx9RMpfqG__1j&CD$<t(;w4-P}Dqy@EqR!@?sXqmoln)6z3Cvx-Yf%gQS%
ztD0L{+uA!iyCzMZGIiSY88c@sTD)ZGvgIpQuG+MD%hqk%ckJAC=<t!F$Bv&kdFs;T
zD_5^wzj5={!$*&wJbm{3#miS8KYjl4_1pI!KYxMz#mK-6@fMJTcnr;7f<S*Uv9K_+
zu!H=?$W#u*%z`YeiiT`Lj)Clng~CckjT|CQ6Blkg$f;}`^g%SK=pvVxipfLOk07ss
meMX$en#l4Q++zrT-D2QjW&}navmk>#!_R+R8VmUU-vj{nWX@>-

literal 0
HcmV?d00001

diff --git a/tests/assets/datumaro_dataset/images/validation/d.png b/tests/assets/datumaro_dataset/images/validation/d.png
new file mode 100644
index 0000000000000000000000000000000000000000..528f10546704be6b339cfe1f577ca4b10ef4f472
GIT binary patch
literal 70
zcmeAS@N?(olHy`uVBq!ia0vp^tU%1j!2~2{&iT9qEaBo9!XcZ?!o;Qm<a#l%PMP%Z
QB9O)4>FVdQ&MBb@0GX=|x&QzG

literal 0
HcmV?d00001

diff --git a/tests/test_labeling.py b/tests/test_labeling.py
new file mode 100644
index 0000000000..708fe0a829
--- /dev/null
+++ b/tests/test_labeling.py
@@ -0,0 +1,85 @@
+# Copyright (C) 2019-2022 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+import tempfile
+from unittest.case import TestCase
+
+import numpy as np
+
+from datumaro.components.annotation import AnnotationType, Label, LabelCategories
+from datumaro.components.extractor import DatasetItem
+from datumaro.components.media import Image
+from datumaro.components.project import Dataset
+
+from .requirements import Requirements, mark_requirement
+
+
+class LabelingTest(TestCase):
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_multiple_label(self):
+        label_categories = LabelCategories()
+        label_categories.add("car", parent="")
+        label_categories.add("bicycle", parent="")
+
+        dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=0,
+                    subset="train",
+                    media=Image(data=np.ones((10, 6, 3))),
+                    annotations=[
+                        Label(
+                            0,
+                            id=0,
+                        ),
+                        Label(
+                            1,
+                            id=1,
+                        ),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: label_categories,
+            },
+        )
+
+        for item in dataset:
+            self.assertEqual(len(item.annotations), 2)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_multiple_label(self):
+        label_categories = LabelCategories()
+        label_categories.add("car", parent="")
+        label_categories.add("bicycle", parent="")
+
+        label_categories.add_label_group("manmade", ["car", "bicycle"], group_type="exclusive")
+
+        dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=0,
+                    subset="train",
+                    media=Image(data=np.ones((10, 6, 3))),
+                    annotations=[
+                        Label(
+                            0,
+                            id=0,
+                        ),
+                        Label(
+                            1,
+                            id=1,
+                        ),
+                    ],
+                ),
+            ],
+            categories={
+                AnnotationType.label: label_categories,
+            },
+        )
+
+        with tempfile.TemporaryDirectory() as temp_home:
+            dataset.export(temp_home, format="datumaro")
+            dataset_imported = Dataset.import_from(temp_home, format="datumaro")
+
+        self.assertEqual(len(dataset_imported.categories()[AnnotationType.label].label_groups), 1)