-
Notifications
You must be signed in to change notification settings - Fork 135
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #47 from openvinotoolkit/develop
Release v0.1.3
- Loading branch information
Showing
25 changed files
with
509 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
|
||
# Copyright (C) 2020 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: MIT | ||
|
||
from glob import glob | ||
import logging as log | ||
import os | ||
import os.path as osp | ||
|
||
from datumaro.components.extractor import (DatasetItem, Label, | ||
LabelCategories, AnnotationType, SourceExtractor, Importer | ||
) | ||
from datumaro.components.converter import Converter | ||
|
||
|
||
class ImagenetPath: | ||
IMAGES_EXT = '.jpg' | ||
IMAGES_DIR_NO_LABEL = 'no_label' | ||
|
||
|
||
class ImagenetExtractor(SourceExtractor): | ||
def __init__(self, path, subset=None): | ||
assert osp.isdir(path), path | ||
super().__init__(subset=subset) | ||
|
||
self._categories = self._load_categories(path) | ||
self._items = list(self._load_items(path).values()) | ||
|
||
def _load_categories(self, path): | ||
label_cat = LabelCategories() | ||
for images_dir in sorted(os.listdir(path)): | ||
if images_dir != ImagenetPath.IMAGES_DIR_NO_LABEL: | ||
label_cat.add(images_dir) | ||
return { AnnotationType.label: label_cat } | ||
|
||
def _load_items(self, path): | ||
items = {} | ||
for image_path in glob(osp.join(path, '*', '*')): | ||
if osp.splitext(image_path)[1] != ImagenetPath.IMAGES_EXT: | ||
continue | ||
label = osp.basename(osp.dirname(image_path)) | ||
image_name = osp.splitext(osp.basename(image_path))[0][len(label) + 1:] | ||
item = items.get(image_name) | ||
if item is None: | ||
item = DatasetItem(id=image_name, subset=self._subset, | ||
image=image_path) | ||
annotations = item.annotations | ||
if label != ImagenetPath.IMAGES_DIR_NO_LABEL: | ||
label = self._categories[AnnotationType.label].find(label)[0] | ||
annotations.append(Label(label=label)) | ||
items[image_name] = item | ||
return items | ||
|
||
|
||
class ImagenetImporter(Importer): | ||
@classmethod | ||
def find_sources(cls, path): | ||
if not osp.isdir(path): | ||
return [] | ||
return [{ 'url': path, 'format': 'imagenet' }] | ||
|
||
|
||
class ImagenetConverter(Converter): | ||
DEFAULT_IMAGE_EXT = ImagenetPath.IMAGES_EXT | ||
|
||
def apply(self): | ||
if 1 < len(self._extractor.subsets()): | ||
log.warning("ImageNet format supports exporting only a single " | ||
"subset, subset information will not be used.") | ||
|
||
subset_dir = self._save_dir | ||
extractor = self._extractor | ||
labels = {} | ||
for item in self._extractor: | ||
image_name = item.id | ||
labels[image_name] = set(p.label for p in item.annotations) | ||
for label in labels[image_name]: | ||
label_name = extractor.categories()[AnnotationType.label][label].name | ||
self._save_image(item, osp.join(subset_dir, label_name, | ||
'%s_%s%s' % \ | ||
(label_name, image_name, ImagenetPath.IMAGES_EXT) | ||
)) | ||
|
||
if not labels[image_name]: | ||
self._save_image(item, osp.join(subset_dir, | ||
ImagenetPath.IMAGES_DIR_NO_LABEL, | ||
ImagenetPath.IMAGES_DIR_NO_LABEL + '_' + | ||
image_name + ImagenetPath.IMAGES_EXT)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
|
||
# Copyright (C) 2020 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: MIT | ||
|
||
from glob import glob | ||
import os | ||
import os.path as osp | ||
|
||
from datumaro.components.extractor import (DatasetItem, Label, | ||
LabelCategories, AnnotationType, SourceExtractor, Importer | ||
) | ||
from datumaro.components.converter import Converter | ||
|
||
|
||
class ImagenetTxtPath: | ||
LABELS_FILE = 'synsets.txt' | ||
IMAGE_DIR = 'images' | ||
|
||
class ImagenetTxtExtractor(SourceExtractor): | ||
def __init__(self, path, labels=None, image_dir=None): | ||
assert osp.isfile(path), path | ||
super().__init__(subset=osp.splitext(osp.basename(path))[0]) | ||
|
||
if not image_dir: | ||
image_dir = ImagenetTxtPath.IMAGE_DIR | ||
self.image_dir = osp.join(osp.dirname(path), image_dir) | ||
|
||
if labels is None: | ||
labels = osp.join(osp.dirname(path), ImagenetTxtPath.LABELS_FILE) | ||
labels = self._parse_labels(labels) | ||
else: | ||
assert all(isinstance(e, str) for e in labels) | ||
|
||
self._categories = self._load_categories(labels) | ||
self._items = list(self._load_items(path).values()) | ||
|
||
@staticmethod | ||
def _parse_labels(path): | ||
with open(path, encoding='utf-8') as labels_file: | ||
return [s.strip() for s in labels_file] | ||
|
||
def _load_categories(self, labels): | ||
return { AnnotationType.label: LabelCategories().from_iterable(labels) } | ||
|
||
def _load_items(self, path): | ||
items = {} | ||
with open(path, encoding='utf-8') as f: | ||
for line in f: | ||
item = line.split() | ||
item_id = item[0] | ||
label_ids = [int(id) for id in item[1:]] | ||
anno = [] | ||
for label in label_ids: | ||
assert 0 <= label and \ | ||
label < len(self._categories[AnnotationType.label]), \ | ||
"Image '%s': unknown label id '%s'" % (item_id, label) | ||
anno.append(Label(label)) | ||
items[item_id] = DatasetItem(id=item_id, subset=self._subset, | ||
image=osp.join(self.image_dir, item_id + '.jpg'), | ||
annotations=anno) | ||
return items | ||
|
||
|
||
class ImagenetTxtImporter(Importer): | ||
@classmethod | ||
def find_sources(cls, path): | ||
subset_paths = [p for p in glob(osp.join(path, '*.txt')) | ||
if osp.basename(p) != ImagenetTxtPath.LABELS_FILE] | ||
sources = [] | ||
for subset_path in subset_paths: | ||
sources += cls._find_sources_recursive( | ||
subset_path, '.txt', 'imagenet_txt') | ||
return sources | ||
|
||
|
||
class ImagenetTxtConverter(Converter): | ||
DEFAULT_IMAGE_EXT = '.jpg' | ||
|
||
def apply(self): | ||
subset_dir = self._save_dir | ||
os.makedirs(subset_dir, exist_ok=True) | ||
|
||
extractor = self._extractor | ||
for subset_name, subset in self._extractor.subsets().items(): | ||
annotation_file = osp.join(subset_dir, '%s.txt' % subset_name) | ||
labels = {} | ||
for item in subset: | ||
labels[item.id] = [str(p.label) for p in item.annotations | ||
if p.type == AnnotationType.label] | ||
|
||
if self._save_images and item.has_image: | ||
self._save_image(item, | ||
osp.join(self._save_dir, ImagenetTxtPath.IMAGE_DIR, | ||
self._make_image_filename(item))) | ||
|
||
with open(annotation_file, 'w', encoding='utf-8') as f: | ||
f.writelines(['%s %s\n' % (item_id, ' '.join(labels[item_id])) | ||
for item_id in labels]) | ||
|
||
labels_file = osp.join(subset_dir, ImagenetTxtPath.LABELS_FILE) | ||
with open(labels_file, 'w', encoding='utf-8') as f: | ||
f.write('\n'.join(l.name | ||
for l in extractor.categories()[AnnotationType.label]) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
VERSION = '0.1.0' | ||
VERSION = '0.1.3' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
label_0 | ||
label_1 | ||
label_2 | ||
label_3 | ||
label_4 | ||
label_5 | ||
label_6 | ||
label_7 | ||
label_8 | ||
label_9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
1 0 | ||
2 5 | ||
3 3 | ||
4 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.