Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add label support in WiderFace dataset format #90

Merged
merged 4 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]
### Added
- `WiderFace` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/65>)
- `WiderFace` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/65>, <https://github.com/openvinotoolkit/datumaro/pull/90>)
- Function to transform annotations to labels (<https://github.com/openvinotoolkit/datumaro/pull/66>)
- Task-specific Splitter (<https://github.com/openvinotoolkit/datumaro/pull/68>, <https://github.com/openvinotoolkit/datumaro/pull/81>)
- `VGGFace2` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/69>, <https://github.com/openvinotoolkit/datumaro/pull/82>)
Expand Down
71 changes: 62 additions & 9 deletions datumaro/plugins/widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@

from datumaro.components.converter import Converter
from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
Importer, SourceExtractor)
Importer, Label, LabelCategories, SourceExtractor)


class WiderFacePath:
IMAGE_EXT = '.jpg'
ANNOTATIONS_DIR = 'wider_face_split'
IMAGES_DIR = 'images'
SUBSET_DIR = 'WIDER_'
LABELS_FILE = 'labels.txt'
IMAGES_DIR_NO_LABEL = 'no_label'
BBOX_ATTRIBUTES = ['blur', 'expression', 'illumination',
'occluded', 'pose', 'invalid']

Expand All @@ -33,8 +35,31 @@ def __init__(self, path):
subset = subset.split('_')[2]
super().__init__(subset=subset)

self._categories = self._load_categories()
self._items = list(self._load_items(path).values())

def _load_categories(self):
self._categories[AnnotationType.label] = LabelCategories()
label_cat = LabelCategories()
path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)
if osp.isfile(path):
with open(path, encoding='utf-8') as labels_file:
labels = [s.strip() for s in labels_file]
for label in labels:
label_cat.add(label)
else:
subset_path = osp.join(self._dataset_dir,
WiderFacePath.SUBSET_DIR + self._subset,
WiderFacePath.IMAGES_DIR)
if osp.isdir(subset_path):
for images_dir in sorted(os.listdir(subset_path)):
if osp.isdir(osp.join(subset_path, images_dir)) and \
images_dir != WiderFacePath.IMAGES_DIR_NO_LABEL:
if '--' in images_dir:
images_dir = images_dir.split('--')[1]
label_cat.add(images_dir)
return { AnnotationType.label: label_cat }

def _load_items(self, path):
items = {}
with open(path, 'r') as f:
Expand All @@ -48,10 +73,19 @@ def _load_items(self, path):
image_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR
+ self._subset, WiderFacePath.IMAGES_DIR, image[:-1])
item_id = image[:-(len(WiderFacePath.IMAGE_EXT) + 1)]
annotations = []
if '/' in item_id:
label_name = item_id.split('/')[0]
if '--' in label_name:
label_name = label_name.split('--')[1]
if label_name != WiderFacePath.IMAGES_DIR_NO_LABEL:
label = \
self._categories[AnnotationType.label].find(label_name)[0]
annotations.append(Label(label=label))
item_id = item_id[len(item_id.split('/')[0]) + 1:]

bbox_count = lines[image_id + 1]
bbox_lines = lines[image_id + 2 : image_id + int(bbox_count) + 2]
annotations = []
for bbox in bbox_lines:
bbox_list = bbox.split()
if len(bbox_list) >= 4:
Expand All @@ -63,8 +97,8 @@ def _load_items(self, path):
attributes[attr] = int(bbox_list[i])
i += 1
annotations.append(Bbox(
int(bbox_list[0]), int(bbox_list[1]),
int(bbox_list[2]), int(bbox_list[3]),
float(bbox_list[0]), float(bbox_list[1]),
float(bbox_list[2]), float(bbox_list[3]),
attributes = attributes
))

Expand All @@ -83,18 +117,37 @@ class WiderFaceConverter(Converter):

def apply(self):
save_dir = self._save_dir

os.makedirs(save_dir, exist_ok=True)

label_categories = self._extractor.categories()[AnnotationType.label]

labels_path = osp.join(save_dir, WiderFacePath.LABELS_FILE)
with open(labels_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(label.name for label in label_categories))

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(save_dir, WiderFacePath.SUBSET_DIR + subset_name)

wider_annotation = ''
for item in subset:
wider_annotation += '%s\n' % (item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, item.id + WiderFacePath.IMAGE_EXT))
labels = [a.label for a in item.annotations
if a.type == AnnotationType.label]
if labels:
wider_annotation += '%s\n' % (str(labels[0]) + '--' \
+ label_categories[labels[0]].name + '/' \
+ item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, str(labels[0]) + '--' \
+ label_categories[labels[0]].name + '/' + item.id \
+ WiderFacePath.IMAGE_EXT))
else:
wider_annotation += '%s\n' % (WiderFacePath.IMAGES_DIR_NO_LABEL \
+ '/' + item.id + WiderFacePath.IMAGE_EXT)
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
WiderFacePath.IMAGES_DIR, WiderFacePath.IMAGES_DIR_NO_LABEL \
+ '/' + item.id + WiderFacePath.IMAGE_EXT))

bboxes = [a for a in item.annotations
if a.type == AnnotationType.bbox]
Expand Down
2 changes: 2 additions & 0 deletions tests/assets/widerface_dataset/labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Parade
Handshaking
33 changes: 21 additions & 12 deletions tests/test_widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from unittest import TestCase

import numpy as np
from datumaro.components.extractor import Bbox, DatasetItem
from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
Label, LabelCategories)
from datumaro.components.dataset import Dataset
from datumaro.plugins.widerface_format import WiderFaceConverter, WiderFaceImporter
from datumaro.util.test_utils import TestDir, compare_datasets
Expand All @@ -17,6 +18,7 @@ def test_can_save_and_load(self):
Bbox(0, 1, 2, 3, attributes = {
'blur': 2, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 2, 'invalid': 0}),
Label(0),
]
),
DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)),
Expand All @@ -30,6 +32,7 @@ def test_can_save_and_load(self):
Bbox(2, 1, 2, 3, attributes = {
'blur': 2, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 0, 'invalid': 1}),
Label(1),
]
),

Expand All @@ -47,11 +50,14 @@ def test_can_save_and_load(self):
),

DatasetItem(id='4', subset='val', image=np.ones((8, 8, 3))),
])
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(i) for i in range(3)),
})

with TestDir() as test_dir:
WiderFaceConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = WiderFaceImporter()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'wider_face')

compare_datasets(self, source_dataset, parsed_dataset)

Expand All @@ -65,11 +71,11 @@ def test_can_save_dataset_with_no_subsets(self):
'occluded': 0, 'pose': 2, 'invalid': 0}),
]
),
])
], categories=[])

with TestDir() as test_dir:
WiderFaceConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = WiderFaceImporter()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'wider_face')

compare_datasets(self, source_dataset, parsed_dataset)

Expand All @@ -85,7 +91,7 @@ def test_can_save_dataset_with_non_widerface_attributes(self):
'non-widerface attribute': 0}),
]
),
])
], categories=[])

target_dataset = Dataset.from_iterable([
DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)),
Expand All @@ -96,11 +102,11 @@ def test_can_save_dataset_with_non_widerface_attributes(self):
Bbox(1, 1, 2, 2),
]
),
])
], categories=[])

with TestDir() as test_dir:
WiderFaceConverter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = WiderFaceImporter()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'wider_face')

compare_datasets(self, target_dataset, parsed_dataset)

Expand All @@ -112,15 +118,16 @@ def test_can_detect(self):

def test_can_import(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='0--Parade/0_Parade_image_01', subset='train',
DatasetItem(id='0_Parade_image_01', subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(1, 2, 2, 2, attributes = {
'blur': 0, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 0, 'invalid': 0}),
Label(0),
]
),
DatasetItem(id='1--Handshaking/1_Handshaking_image_02', subset='train',
DatasetItem(id='1_Handshaking_image_02', subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(1, 1, 2, 2, attributes = {
Expand All @@ -129,9 +136,10 @@ def test_can_import(self):
Bbox(5, 1, 2, 2, attributes = {
'blur': 0, 'expression': 0, 'illumination': 1,
'occluded': 0, 'pose': 0, 'invalid': 0}),
Label(1),
]
),
DatasetItem(id='0--Parade/0_Parade_image_03', subset='val',
DatasetItem(id='0_Parade_image_03', subset='val',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(0, 0, 1, 1, attributes = {
Expand All @@ -143,9 +151,10 @@ def test_can_import(self):
Bbox(5, 6, 1, 1, attributes = {
'blur': 2, 'expression': 0, 'illumination': 0,
'occluded': 0, 'pose': 2, 'invalid': 0}),
Label(0),
]
),
])
], categories= ['Parade', 'Handshaking'])

dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'wider_face')

Expand Down