From 8eb370892c7363910c60cd7604b97cb018319b96 Mon Sep 17 00:00:00 2001 From: bonhun koo Date: Thu, 20 Oct 2022 02:58:50 +0900 Subject: [PATCH] hierarchical labeling --- CHANGELOG.md | 2 + datumaro/components/annotation.py | 24 +++- datumaro/plugins/datumaro_format/converter.py | 14 +++ datumaro/plugins/datumaro_format/extractor.py | 9 +- .../datumaro_dataset/annotations/test.json | 77 +++++++++++++ .../datumaro_dataset/annotations/train.json | 103 ++++++++++++++++++ .../annotations/validation.json | 54 +++++++++ .../assets/datumaro_dataset/images/test/c.jpg | Bin 0 -> 631 bytes .../datumaro_dataset/images/train/a.jpg | Bin 0 -> 631 bytes .../datumaro_dataset/images/train/b.jpg | Bin 0 -> 631 bytes .../datumaro_dataset/images/validation/d.png | Bin 0 -> 70 bytes tests/test_labeling.py | 85 +++++++++++++++ 12 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 tests/assets/datumaro_dataset/annotations/test.json create mode 100644 tests/assets/datumaro_dataset/annotations/train.json create mode 100644 tests/assets/datumaro_dataset/annotations/validation.json create mode 100644 tests/assets/datumaro_dataset/images/test/c.jpg create mode 100644 tests/assets/datumaro_dataset/images/train/a.jpg create mode 100644 tests/assets/datumaro_dataset/images/train/b.jpg create mode 100644 tests/assets/datumaro_dataset/images/validation/d.png create mode 100644 tests/test_labeling.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 84c35ac044..afe26adeb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Add jupyter sample introducing how to merge datasets () +- Support for exclusive of labels with LabelGroup + () ## 06/09/2022 - Release v0.3.1 ### Added diff --git a/datumaro/components/annotation.py b/datumaro/components/annotation.py index baa5376dd1..2ebec612b9 100644 --- a/datumaro/components/annotation.py +++ b/datumaro/components/annotation.py @@ -98,7 +98,14 @@ class Category: parent: str = field(default="", validator=default_if_none(str)) attributes: Set[str] = field(factory=set, validator=default_if_none(set)) + @attrs(slots=True, order=False) + class LabelGroup: + name: str = field(converter=str, validator=not_empty) + labels: List[str] = field(default=[], validator=default_if_none(list)) + group_type: str = field(default="exclusive", validator=default_if_none(str)) + items: List[str] = field(factory=list, validator=default_if_none(list)) + label_groups: List[str] = field(factory=list, validator=default_if_none(list)) _indices: Dict[str, int] = field(factory=dict, init=False, eq=False) @classmethod @@ -146,7 +153,10 @@ def _reindex(self): self._indices = indices def add( - self, name: str, parent: Optional[str] = None, attributes: Optional[Set[str]] = None + self, + name: str, + parent: Optional[str] = None, + attributes: Optional[Set[str]] = None, ) -> int: assert name assert name not in self._indices, name @@ -156,6 +166,18 @@ def add( self._indices[name] = index return index + def add_label_group( + self, + name: str, + labels: List[str], + group_type: str, + ) -> int: + assert name + + index = len(self.label_groups) + self.label_groups.append(self.LabelGroup(name, labels, group_type)) + return index + def find(self, name: str) -> Tuple[Optional[int], Optional[Category]]: index = self._indices.get(name) if index is not None: diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py index dce8ca38b8..251c5bc802 100644 --- a/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/plugins/datumaro_format/converter.py @@ -7,12 +7,14 @@ import os import os.path as osp import shutil +from collections import defaultdict import numpy as np import pycocotools.mask as mask_utils from datumaro.components.annotation import ( Annotation, + AnnotationType, Bbox, Caption, Cuboid3d, @@ -268,9 +270,13 @@ def _convert_cuboid_3d_object(self, obj): def _convert_attribute_categories(self, attributes): return sorted(attributes) + def _convert_labels_label_groups(self, labels): + return sorted(labels) + def _convert_label_categories(self, obj): converted = { "labels": [], + "label_groups": [], "attributes": self._convert_attribute_categories(obj.attributes), } for label in obj.items: @@ -281,6 +287,14 @@ def _convert_label_categories(self, obj): "attributes": self._convert_attribute_categories(label.attributes), } ) + for label_group in obj.label_groups: + converted["label_groups"].append( + { + "name": cast(label_group.name, str), + "group_type": cast(label_group.group_type, str), + "labels": self._convert_labels_label_groups(label_group.labels), + } + ) return converted def _convert_mask_categories(self, obj): diff --git a/datumaro/plugins/datumaro_format/extractor.py b/datumaro/plugins/datumaro_format/extractor.py index 62f781169d..a47085d5cc 100644 --- a/datumaro/plugins/datumaro_format/extractor.py +++ b/datumaro/plugins/datumaro_format/extractor.py @@ -65,7 +65,14 @@ def _load_categories(parsed): label_categories = LabelCategories(attributes=parsed_label_cat.get("attributes", [])) for item in parsed_label_cat["labels"]: label_categories.add( - item["name"], parent=item["parent"], attributes=item.get("attributes", []) + item["name"], + parent=item["parent"], + attributes=item.get("attributes", []), + ) + + for item in parsed_label_cat["label_groups"]: + label_categories.add_label_group( + item["name"], labels=item["labels"], group_type=item["group_type"] ) categories[AnnotationType.label] = label_categories diff --git a/tests/assets/datumaro_dataset/annotations/test.json b/tests/assets/datumaro_dataset/annotations/test.json new file mode 100644 index 0000000000..12aabb1088 --- /dev/null +++ b/tests/assets/datumaro_dataset/annotations/test.json @@ -0,0 +1,77 @@ +{ + "info": {}, + "categories": { + "label": { + "label_groups": [ + { + "name": "manmade", + "group_type": "exclusive", + "labels": ["car", "bicycle"] + }, + { + "name": "empty_group", + "group_type": "empty", + "labels": ["tom", "mary"] + } + ], + "labels": [ + { + "name": "car", + "parent": "", + "attributes": [] + }, + { + "name": "bicycle", + "parent": "", + "attributes": [] + }, + { + "name": "tom", + "parent": "", + "attributes": [] + }, + { + "name": "mary", + "parent": "", + "attributes": [] + } + ], + "attributes": [] + } + }, + "items": [ + { + "id": "c", + "annotations": [ + { + "id": 0, + "type": "label", + "attributes": { + "score": 1.0 + }, + "group": 0, + "label_id": 1 + }, + { + "id": 0, + "type": "label", + "attributes": { + "score": 1.0 + }, + "group": 0, + "label_id": 3 + } + ], + "image": { + "path": "../tests/assets/datumaro_dataset/images/test/c.jpg", + "size": [ + 10, + 5 + ] + }, + "media": { + "path": "../tests/assets/datumaro_dataset/images/test/c.jpg" + } + } + ] +} \ No newline at end of file diff --git a/tests/assets/datumaro_dataset/annotations/train.json b/tests/assets/datumaro_dataset/annotations/train.json new file mode 100644 index 0000000000..b24a77ab78 --- /dev/null +++ b/tests/assets/datumaro_dataset/annotations/train.json @@ -0,0 +1,103 @@ +{ + "info": {}, + "categories": { + "label": { + "label_groups": [ + { + "name": "manmade", + "group_type": "exclusive", + "labels": ["car", "bicycle"] + }, + { + "name": "empty_group", + "group_type": "empty", + "labels": ["tom", "mary"] + } + ], + "labels": [ + { + "name": "car", + "parent": "", + "attributes": [] + }, + { + "name": "bicycle", + "parent": "", + "attributes": [] + }, + { + "name": "tom", + "parent": "", + "attributes": [] + }, + { + "name": "mary", + "parent": "", + "attributes": [] + } + ], + "attributes": [] + } + }, + "items": [ + { + "id": "a", + "annotations": [ + { + "id": 0, + "type": "label", + "attributes": { + "score": 1.0 + }, + "group": 0, + "label_id": 0 + } + ], + "image": { + "path": "../tests/assets/datumaro_dataset/images/train/a.jpg" + }, + "media": { + "path": "../tests/assets/datumaro_dataset/images/train/a.jpg" + } + }, + { + "id": "b", + "annotations": [ + { + "id": 0, + "type": "label", + "group": 0, + "label_id": 0 + }, + { + "id": 1, + "type": "label", + "group": 0, + "label_id": 1 + }, + { + "id": 2, + "type": "label", + "group": 0, + "label_id": 2 + }, + { + "id": 3, + "type": "label", + "group": 0, + "label_id": 5 + } + ], + "image": { + "path": "../tests/assets/datumaro_dataset/images/train/b.jpg", + "size": [ + 2, + 8 + ] + }, + "media": { + "path": "../tests/assets/datumaro_dataset/images/train/b.jpg" + } + } + ] +} \ No newline at end of file diff --git a/tests/assets/datumaro_dataset/annotations/validation.json b/tests/assets/datumaro_dataset/annotations/validation.json new file mode 100644 index 0000000000..4367b0af57 --- /dev/null +++ b/tests/assets/datumaro_dataset/annotations/validation.json @@ -0,0 +1,54 @@ +{ + "info": {}, + "categories": { + "label": { + "label_groups": [ + { + "name": "manmade", + "group_type": "exclusive", + "labels": ["car", "bicycle"] + }, + { + "name": "empty_group", + "group_type": "empty", + "labels": ["tom", "mary"] + } + ], + "labels": [ + { + "name": "car", + "parent": "", + "attributes": [] + }, + { + "name": "bicycle", + "parent": "", + "attributes": [] + }, + { + "name": "tom", + "parent": "", + "attributes": [] + }, + { + "name": "mary", + "parent": "", + "attributes": [] + } + ], + "attributes": [] + } + }, + "items": [ + { + "id": "d", + "annotations": [], + "image": { + "path": "../tests/assets/datumaro_dataset/images/validation/d.png" + }, + "media": { + "path": "../tests/assets/datumaro_dataset/images/validation/d.png" + } + } + ] +} \ No newline at end of file diff --git a/tests/assets/datumaro_dataset/images/test/c.jpg b/tests/assets/datumaro_dataset/images/test/c.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8bce84d3bf50bd756621338e0da944a42428fb06 GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!v`*nMGf}^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!_R+R8VmUU-vj{uw9ah+ literal 0 HcmV?d00001 diff --git a/tests/assets/datumaro_dataset/images/train/b.jpg b/tests/assets/datumaro_dataset/images/train/b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0ab7dbe4a41973063285ddadd8f7a2d10ca91c45 GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!_R+R8VmUU-vj{nWX@>- literal 0 HcmV?d00001 diff --git a/tests/assets/datumaro_dataset/images/validation/d.png b/tests/assets/datumaro_dataset/images/validation/d.png new file mode 100644 index 0000000000000000000000000000000000000000..528f10546704be6b339cfe1f577ca4b10ef4f472 GIT binary patch literal 70 zcmeAS@N?(olHy`uVBq!ia0vp^tU%1j!2~2{&iT9qEaBo9!XcZ?!o;QmFVdQ&MBb@0GX=|x&QzG literal 0 HcmV?d00001 diff --git a/tests/test_labeling.py b/tests/test_labeling.py new file mode 100644 index 0000000000..708fe0a829 --- /dev/null +++ b/tests/test_labeling.py @@ -0,0 +1,85 @@ +# Copyright (C) 2019-2022 Intel Corporation +# +# SPDX-License-Identifier: MIT +import tempfile +from unittest.case import TestCase + +import numpy as np + +from datumaro.components.annotation import AnnotationType, Label, LabelCategories +from datumaro.components.extractor import DatasetItem +from datumaro.components.media import Image +from datumaro.components.project import Dataset + +from .requirements import Requirements, mark_requirement + + +class LabelingTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_multiple_label(self): + label_categories = LabelCategories() + label_categories.add("car", parent="") + label_categories.add("bicycle", parent="") + + dataset = Dataset.from_iterable( + [ + DatasetItem( + id=0, + subset="train", + media=Image(data=np.ones((10, 6, 3))), + annotations=[ + Label( + 0, + id=0, + ), + Label( + 1, + id=1, + ), + ], + ), + ], + categories={ + AnnotationType.label: label_categories, + }, + ) + + for item in dataset: + self.assertEqual(len(item.annotations), 2) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_multiple_label(self): + label_categories = LabelCategories() + label_categories.add("car", parent="") + label_categories.add("bicycle", parent="") + + label_categories.add_label_group("manmade", ["car", "bicycle"], group_type="exclusive") + + dataset = Dataset.from_iterable( + [ + DatasetItem( + id=0, + subset="train", + media=Image(data=np.ones((10, 6, 3))), + annotations=[ + Label( + 0, + id=0, + ), + Label( + 1, + id=1, + ), + ], + ), + ], + categories={ + AnnotationType.label: label_categories, + }, + ) + + with tempfile.TemporaryDirectory() as temp_home: + dataset.export(temp_home, format="datumaro") + dataset_imported = Dataset.import_from(temp_home, format="datumaro") + + self.assertEqual(len(dataset_imported.categories()[AnnotationType.label].label_groups), 1)