Skip to content

Commit

Permalink
hierarchical labeling
Browse files Browse the repository at this point in the history
  • Loading branch information
bonhunko committed Oct 18, 2022
1 parent 5ff089d commit 4ca506e
Show file tree
Hide file tree
Showing 12 changed files with 413 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Add jupyter sample introducing how to merge datasets
(<https://github.com/openvinotoolkit/datumaro/pull/738>)
- Support for hierarchical labeling with single-selection
(<https://github.com/openvinotoolkit/datumaro/pull/742>)

## 06/09/2022 - Release v0.3.1
### Added
Expand Down
9 changes: 7 additions & 2 deletions datumaro/components/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class Category:
name: str = field(converter=str, validator=not_empty)
parent: str = field(default="", validator=default_if_none(str))
attributes: Set[str] = field(factory=set, validator=default_if_none(set))
single_selection: bool = field(default=False, validator=default_if_none(bool))

items: List[str] = field(factory=list, validator=default_if_none(list))
_indices: Dict[str, int] = field(factory=dict, init=False, eq=False)
Expand Down Expand Up @@ -146,13 +147,17 @@ def _reindex(self):
self._indices = indices

def add(
self, name: str, parent: Optional[str] = None, attributes: Optional[Set[str]] = None
self,
name: str,
parent: Optional[str] = None,
attributes: Optional[Set[str]] = None,
single_selection: Optional[bool] = False,
) -> int:
assert name
assert name not in self._indices, name

index = len(self.items)
self.items.append(self.Category(name, parent, attributes))
self.items.append(self.Category(name, parent, attributes, single_selection))
self._indices[name] = index
return index

Expand Down
37 changes: 37 additions & 0 deletions datumaro/plugins/datumaro_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from datumaro.components.annotation import (
Annotation,
AnnotationType,
Bbox,
Caption,
Cuboid3d,
Expand Down Expand Up @@ -279,6 +280,7 @@ def _convert_label_categories(self, obj):
"name": cast(label.name, str),
"parent": cast(label.parent, str),
"attributes": self._convert_attribute_categories(label.attributes),
"single_selection": cast(label.single_selection, bool),
}
)
return converted
Expand Down Expand Up @@ -336,6 +338,7 @@ def apply(self):

for item in self._extractor:
subset = item.subset or DEFAULT_SUBSET_NAME
item = self._filterout_for_single_selection(item, self._extractor.categories())
writers[subset].add_item(item)

for subset, writer in writers.items():
Expand Down Expand Up @@ -383,3 +386,37 @@ def patch(cls, dataset, patch, save_dir, **kwargs):
)
if osp.isdir(related_images_path):
shutil.rmtree(related_images_path)

@staticmethod
def _filterout_for_single_selection(item, categories):
name2parent_ss = {}
for label_category in categories[AnnotationType.label]:
name2parent_ss[label_category.name] = (
label_category.parent,
label_category.single_selection,
)

# collect childrens that have the same parent which only allow single-selection
parent2ss_indices = {}
for i, annotation in enumerate(item.annotations):
if annotation._type != AnnotationType.label:
continue

label_name = categories[AnnotationType.label][annotation.label].name

parent, single_selection = name2parent_ss[label_name]

if len(parent) == 0:
continue

if not single_selection:
parent2ss_indices[parent] = parent2ss_indices.get(parent, []) + [i]

# remove labels that dis-obey the single-selection rule
for indices in parent2ss_indices.values():
if len(indices) <= 1:
continue
for i in sorted(indices, reverse=True):
del item.annotations[i]

return item
5 changes: 4 additions & 1 deletion datumaro/plugins/datumaro_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def _load_categories(parsed):
label_categories = LabelCategories(attributes=parsed_label_cat.get("attributes", []))
for item in parsed_label_cat["labels"]:
label_categories.add(
item["name"], parent=item["parent"], attributes=item.get("attributes", [])
item["name"],
parent=item["parent"],
attributes=item.get("attributes", []),
single_selection=item.get("single_selection", False),
)

categories[AnnotationType.label] = label_categories
Expand Down
81 changes: 81 additions & 0 deletions tests/assets/datumaro_dataset/annotations/test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{
"name": "manmade",
"parent": "",
"single_selection": true,
"attributes": []
},
{
"name": "car",
"parent": "manmade",
"single_selection": false,
"attributes": []
},
{
"name": "bicycle",
"parent": "manmade",
"single_selection": false,
"attributes": []
},
{
"name": "accesory",
"parent": "",
"single_selection": false,
"attributes": []
},
{
"name": "glasses",
"parent": "accesory",
"single_selection": false,
"attributes": []
},
{
"name": "glove",
"parent": "accesory",
"single_selection": false,
"attributes": []
}
],
"attributes": []
}
},
"items": [
{
"id": "c",
"annotations": [
{
"id": 0,
"type": "label",
"attributes": {
"score": 1.0
},
"group": 0,
"label_id": 1
},
{
"id": 0,
"type": "label",
"attributes": {
"score": 1.0
},
"group": 0,
"label_id": 3
}
],
"image": {
"path": "../tests/assets/datumaro_dataset/images/test/c.jpg",
"size": [
10,
5
]
},
"media": {
"path": "../tests/assets/datumaro_dataset/images/test/c.jpg"
}
}
]
}
107 changes: 107 additions & 0 deletions tests/assets/datumaro_dataset/annotations/train.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{
"name": "manmade",
"parent": "",
"single_selection": true,
"attributes": []
},
{
"name": "car",
"parent": "manmade",
"single_selection": false,
"attributes": []
},
{
"name": "bicycle",
"parent": "manmade",
"single_selection": false,
"attributes": []
},
{
"name": "accesory",
"parent": "",
"single_selection": false,
"attributes": []
},
{
"name": "glasses",
"parent": "accesory",
"single_selection": false,
"attributes": []
},
{
"name": "glove",
"parent": "accesory",
"single_selection": false,
"attributes": []
}
],
"attributes": []
}
},
"items": [
{
"id": "a",
"annotations": [
{
"id": 0,
"type": "label",
"attributes": {
"score": 1.0
},
"group": 0,
"label_id": 0
}
],
"image": {
"path": "../tests/assets/datumaro_dataset/images/train/a.jpg"
},
"media": {
"path": "../tests/assets/datumaro_dataset/images/train/a.jpg"
}
},
{
"id": "b",
"annotations": [
{
"id": 0,
"type": "label",
"group": 0,
"label_id": 0
},
{
"id": 1,
"type": "label",
"group": 0,
"label_id": 1
},
{
"id": 2,
"type": "label",
"group": 0,
"label_id": 2
},
{
"id": 3,
"type": "label",
"group": 0,
"label_id": 5
}
],
"image": {
"path": "../tests/assets/datumaro_dataset/images/train/b.jpg",
"size": [
2,
8
]
},
"media": {
"path": "../tests/assets/datumaro_dataset/images/train/b.jpg"
}
}
]
}
58 changes: 58 additions & 0 deletions tests/assets/datumaro_dataset/annotations/validation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{
"name": "manmade",
"parent": "",
"single_selection": true,
"attributes": []
},
{
"name": "car",
"parent": "manmade",
"single_selection": false,
"attributes": []
},
{
"name": "bicycle",
"parent": "manmade",
"single_selection": false,
"attributes": []
},
{
"name": "accesory",
"parent": "",
"single_selection": false,
"attributes": []
},
{
"name": "glasses",
"parent": "accesory",
"single_selection": false,
"attributes": []
},
{
"name": "glove",
"parent": "accesory",
"single_selection": false,
"attributes": []
}
],
"attributes": []
}
},
"items": [
{
"id": "d",
"annotations": [],
"image": {
"path": "../tests/assets/datumaro_dataset/images/validation/d.png"
},
"media": {
"path": "../tests/assets/datumaro_dataset/images/validation/d.png"
}
}
]
}
Binary file added tests/assets/datumaro_dataset/images/test/c.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/datumaro_dataset/images/train/a.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/datumaro_dataset/images/train/b.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 4ca506e

Please sign in to comment.