forked from cvat-ai/cvat
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support for LFW dataset format (cvat-ai#110)
* add support for LFW dataset format * update documentation * update Changelog Co-authored-by: Maxim Zhiltsov <maxim.zhiltsov@intel.com>
- Loading branch information
1 parent
1325eef
commit dad5c05
Showing
10 changed files
with
295 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
# Copyright (C) 2020 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: MIT | ||
|
||
import os | ||
import os.path as osp | ||
import re | ||
|
||
from datumaro.components.converter import Converter | ||
from datumaro.components.extractor import (AnnotationType, DatasetItem, | ||
Importer, Points, SourceExtractor) | ||
|
||
|
||
class LfwPath: | ||
IMAGES_DIR = 'images' | ||
LANDMARKS_FILE = 'landmarks.txt' | ||
PAIRS_FILE = 'pairs.txt' | ||
IMAGE_EXT = '.jpg' | ||
PATTERN = re.compile(r'([\w]+)_([-\d]+)') | ||
|
||
class LfwExtractor(SourceExtractor): | ||
def __init__(self, path): | ||
if not osp.isfile(path): | ||
raise NotADirectoryError("Can't read annotation file '%s'" % path) | ||
super().__init__(subset=osp.basename(osp.dirname(path))) | ||
self._dataset_dir = osp.dirname(osp.dirname(path)) | ||
self._items = list(self._load_items(path).values()) | ||
|
||
def _load_items(self, path): | ||
items = {} | ||
images_dir = osp.join(self._dataset_dir, self._subset, LfwPath.IMAGES_DIR) | ||
with open(path, encoding='utf-8') as f: | ||
for line in f: | ||
pair = line.strip().split() | ||
if len(pair) == 3: | ||
image1 = self.get_image_name(pair[0], pair[1]) | ||
image2 = self.get_image_name(pair[0], pair[2]) | ||
if image1 not in items: | ||
items[image1] = DatasetItem(id=image1, subset=self._subset, | ||
image=osp.join(images_dir, image1 + LfwPath.IMAGE_EXT), | ||
attributes={'positive_pairs': [], 'negative_pairs': []}) | ||
if image2 not in items: | ||
items[image2] = DatasetItem(id=image2, subset=self._subset, | ||
image=osp.join(images_dir, image2 + LfwPath.IMAGE_EXT), | ||
attributes={'positive_pairs': [], 'negative_pairs': []}) | ||
|
||
attributes = items[image1].attributes | ||
attributes['positive_pairs'].append(image2) | ||
elif len(pair) == 4: | ||
image1 = self.get_image_name(pair[0], pair[1]) | ||
image2 = self.get_image_name(pair[2], pair[3]) | ||
if image1 not in items: | ||
items[image1] = DatasetItem(id=image1, subset=self._subset, | ||
image=osp.join(images_dir, image1 + LfwPath.IMAGE_EXT), | ||
attributes={'positive_pairs': [], 'negative_pairs': []}) | ||
if image2 not in items: | ||
items[image2] = DatasetItem(id=image2, subset=self._subset, | ||
image=osp.join(images_dir, image2 + LfwPath.IMAGE_EXT), | ||
attributes={'positive_pairs': [], 'negative_pairs': []}) | ||
|
||
attributes = items[image1].attributes | ||
attributes['negative_pairs'].append(image2) | ||
|
||
landmarks_file = osp.join(self._dataset_dir, self._subset, | ||
LfwPath.LANDMARKS_FILE) | ||
if osp.isfile(landmarks_file): | ||
with open(landmarks_file, encoding='utf-8') as f: | ||
for line in f: | ||
line = line.split('\t') | ||
|
||
item_id = line[0] | ||
if item_id.endswith(LfwPath.IMAGE_EXT): | ||
item_id = item_id[:-len(LfwPath.IMAGE_EXT)] | ||
if item_id not in items: | ||
items[item_id] = DatasetItem(id=item_id, subset=self._subset, | ||
image=osp.join(images_dir, line[0]), | ||
attributes={'positive_pairs': [], 'negative_pairs': []}) | ||
|
||
annotations = items[item_id].annotations | ||
annotations.append(Points([float(p) for p in line[1:]])) | ||
return items | ||
|
||
@staticmethod | ||
def get_image_name(person, image_id): | ||
return '{}/{}_{:04d}'.format(person, person, int(image_id)) | ||
|
||
class LfwImporter(Importer): | ||
@classmethod | ||
def find_sources(cls, path): | ||
return cls._find_sources_recursive(path, LfwPath.PAIRS_FILE, 'lfw') | ||
|
||
class LfwConverter(Converter): | ||
DEFAULT_IMAGE_EXT = '.jpg' | ||
|
||
def apply(self): | ||
for subset_name, subset in self._extractor.subsets().items(): | ||
positive_pairs = [] | ||
negative_pairs = [] | ||
landmarks = [] | ||
for item in subset: | ||
if item.has_image and self._save_images: | ||
self._save_image(item, osp.join(self._save_dir, subset_name, | ||
LfwPath.IMAGES_DIR, item.id + LfwPath.IMAGE_EXT)) | ||
|
||
person1, num1 = LfwPath.PATTERN.search(item.id).groups() | ||
num1 = int(num1) | ||
if 'positive_pairs' in item.attributes: | ||
for pair in item.attributes['positive_pairs']: | ||
num2 = LfwPath.PATTERN.search(pair).groups()[1] | ||
num2 = int(num2) | ||
positive_pairs.append('%s\t%s\t%s' % (person1, num1, num2)) | ||
if 'negative_pairs' in item.attributes: | ||
for pair in item.attributes['negative_pairs']: | ||
person2, num2 = LfwPath.PATTERN.search(pair).groups() | ||
num2 = int(num2) | ||
negative_pairs.append('%s\t%s\t%s\t%s' % \ | ||
(person1, num1, person2, num2)) | ||
|
||
item_landmarks = [p for p in item.annotations | ||
if p.type == AnnotationType.points] | ||
for landmark in item_landmarks: | ||
landmarks.append('%s\t%s' % (item.id + LfwPath.IMAGE_EXT, | ||
'\t'.join(str(p) for p in landmark.points))) | ||
|
||
pairs_file = osp.join(self._save_dir, subset_name, LfwPath.PAIRS_FILE) | ||
os.makedirs(osp.dirname(pairs_file), exist_ok=True) | ||
with open(pairs_file, 'w', encoding='utf-8') as f: | ||
f.writelines(['%s\n' % pair for pair in positive_pairs]) | ||
f.writelines(['%s\n' % pair for pair in negative_pairs]) | ||
|
||
if landmarks: | ||
landmarks_file = osp.join(self._save_dir, subset_name, | ||
LfwPath.LANDMARKS_FILE) | ||
with open(landmarks_file, 'w', encoding='utf-8') as f: | ||
f.writelines(['%s\n' % landmark for landmark in landmarks]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
name0/name0_0001.jpg 0 4 3 3 2 2 1 0 3 0 | ||
name1/name1_0001.jpg 1 6 4 6 3 3 2 1 4 1 | ||
name1/name1_0002.jpg 0 5 3 5 2 2 1 0 3 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
1 2 | ||
name1 1 2 | ||
name0 1 name1 1 | ||
name0 1 name1 2 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import os.path as osp | ||
from unittest import TestCase | ||
|
||
import numpy as np | ||
from datumaro.components.dataset import Dataset | ||
from datumaro.components.extractor import DatasetItem, Points | ||
from datumaro.plugins.lfw_format import LfwConverter, LfwImporter | ||
from datumaro.util.test_utils import TestDir, compare_datasets | ||
|
||
|
||
class LfwFormatTest(TestCase): | ||
def test_can_save_and_load(self): | ||
source_dataset = Dataset.from_iterable([ | ||
DatasetItem(id='name0/name0_0001', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': ['name0/name0_0002'], | ||
'negative_pairs': [] | ||
} | ||
), | ||
DatasetItem(id='name0/name0_0002', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': [], | ||
'negative_pairs': ['name1/name1_0001'] | ||
} | ||
), | ||
DatasetItem(id='name1/name1_0001', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': ['name1/name1_0002'], | ||
'negative_pairs': [] | ||
} | ||
), | ||
DatasetItem(id='name1/name1_0002', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': [], | ||
'negative_pairs': ['name0/name0_0001'] | ||
} | ||
), | ||
]) | ||
|
||
with TestDir() as test_dir: | ||
LfwConverter.convert(source_dataset, test_dir, save_images=True) | ||
parsed_dataset = Dataset.import_from(test_dir, 'lfw') | ||
|
||
compare_datasets(self, source_dataset, parsed_dataset) | ||
|
||
def test_can_save_and_load_with_landmarks(self): | ||
source_dataset = Dataset.from_iterable([ | ||
DatasetItem(id='name0/name0_0001', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': ['name0/name0_0002'], | ||
'negative_pairs': [] | ||
}, | ||
annotations=[ | ||
Points([0, 4, 3, 3, 2, 2, 1, 0, 3, 0]), | ||
] | ||
), | ||
DatasetItem(id='name0/name0_0002', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': [], | ||
'negative_pairs': [] | ||
}, | ||
annotations=[ | ||
Points([0, 5, 3, 5, 2, 2, 1, 0, 3, 0]), | ||
] | ||
), | ||
]) | ||
|
||
with TestDir() as test_dir: | ||
LfwConverter.convert(source_dataset, test_dir, save_images=True) | ||
parsed_dataset = Dataset.import_from(test_dir, 'lfw') | ||
|
||
compare_datasets(self, source_dataset, parsed_dataset) | ||
|
||
def test_can_save_and_load_with_no_subsets(self): | ||
source_dataset = Dataset.from_iterable([ | ||
DatasetItem(id='name0/name0_0001', | ||
image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': ['name0/name0_0002'], | ||
'negative_pairs': [] | ||
}, | ||
), | ||
DatasetItem(id='name0/name0_0002', | ||
image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': [], | ||
'negative_pairs': [] | ||
}, | ||
), | ||
]) | ||
|
||
with TestDir() as test_dir: | ||
LfwConverter.convert(source_dataset, test_dir, save_images=True) | ||
parsed_dataset = Dataset.import_from(test_dir, 'lfw') | ||
|
||
compare_datasets(self, source_dataset, parsed_dataset) | ||
|
||
DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'lfw_dataset') | ||
|
||
class LfwImporterTest(TestCase): | ||
def test_can_detect(self): | ||
self.assertTrue(LfwImporter.detect(DUMMY_DATASET_DIR)) | ||
|
||
def test_can_import(self): | ||
expected_dataset = Dataset.from_iterable([ | ||
DatasetItem(id='name0/name0_0001', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': [], | ||
'negative_pairs': ['name1/name1_0001', | ||
'name1/name1_0002'] | ||
}, | ||
annotations=[ | ||
Points([0, 4, 3, 3, 2, 2, 1, 0, 3, 0]), | ||
] | ||
), | ||
DatasetItem(id='name1/name1_0001', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': ['name1/name1_0002'], | ||
'negative_pairs': [] | ||
}, | ||
annotations=[ | ||
Points([1, 6, 4, 6, 3, 3, 2, 1, 4, 1]), | ||
] | ||
), | ||
DatasetItem(id='name1/name1_0002', | ||
subset='test', image=np.ones((2, 5, 3)), | ||
attributes = { | ||
'positive_pairs': [], | ||
'negative_pairs': [] | ||
}, | ||
annotations=[ | ||
Points([0, 5, 3, 5, 2, 2, 1, 0, 3, 0]), | ||
] | ||
), | ||
]) | ||
|
||
dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'lfw') | ||
|
||
compare_datasets(self, expected_dataset, dataset) |