Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset meta file for some formats #569

Merged
merged 21 commits into from
Nov 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/547>)
- Import for SYNTHIA dataset format
(<https://github.com/openvinotoolkit/datumaro/pull/532>)
- Support for Accuracy Checker dataset meta files in formats
(<https://github.com/openvinotoolkit/datumaro/pull/553>)
- Support of `score` attribute in KITTI detetion
(<https://github.com/openvinotoolkit/datumaro/pull/571>)
- Support for Accuracy Checker dataset meta files in formats
(<https://github.com/openvinotoolkit/datumaro/pull/553>,
<https://github.com/openvinotoolkit/datumaro/pull/569>)

### Changed
- The following formats can now be detected unambiguously:
Expand Down
9 changes: 8 additions & 1 deletion datumaro/plugins/ade20k2017_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from datumaro.util.image import (
IMAGE_EXTENSIONS, find_images, lazy_image, load_image,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class Ade20k2017Path:
Expand All @@ -31,7 +32,9 @@ def __init__(self, path):
if not osp.isdir(path):
raise FileNotFoundError("Can't read dataset directory '%s'" % path)

subsets = os.listdir(path)
# exclude dataset meta file
subsets = [subset for subset in os.listdir(path)
if osp.splitext(subset)[-1] != '.json']
if len(subsets) < 1:
raise FileNotFoundError("Can't read subsets in directory '%s'" % path)

Expand All @@ -41,6 +44,10 @@ def __init__(self, path):
self._items = []
self._categories = {}

if has_meta_file(self._path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._path).keys())) }

for subset in self._subsets:
self._load_items(subset)

Expand Down
9 changes: 8 additions & 1 deletion datumaro/plugins/ade20k2020_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datumaro.util.image import (
IMAGE_EXTENSIONS, find_images, lazy_image, load_image,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class Ade20k2020Path:
Expand All @@ -33,7 +34,9 @@ def __init__(self, path):
if not osp.isdir(path):
raise FileNotFoundError("Can't read dataset directory '%s'" % path)

subsets = os.listdir(path)
# exclude dataset meta file
subsets = [subset for subset in os.listdir(path)
if osp.splitext(subset)[-1] != '.json']
if len(subsets) < 1:
raise FileNotFoundError("Can't read subsets in directory '%s'" % path)

Expand All @@ -43,6 +46,10 @@ def __init__(self, path):
self._items = []
self._categories = {}

if has_meta_file(self._path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._path).keys())) }

for subset in self._subsets:
self._load_items(subset)

Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/align_celeba_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from datumaro.components.errors import DatasetImportError
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class AlignCelebaPath:
Expand All @@ -32,6 +33,10 @@ def __init__(self, path):
self._anno_dir = osp.dirname(path)

self._categories = { AnnotationType.label: LabelCategories() }
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

self._items = list(self._load_items(path).values())

def _load_items(self, root_dir):
Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/celeba_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from datumaro.components.errors import DatasetImportError
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class CelebaPath:
Expand All @@ -30,6 +31,10 @@ def __init__(self, path):
super().__init__()

self._categories = { AnnotationType.label: LabelCategories() }
if has_meta_file(path):
self._categories = { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

self._items = list(self._load_items(path).values())

def _load_items(self, root_dir):
Expand Down
8 changes: 8 additions & 0 deletions datumaro/plugins/cifar_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from datumaro.components.dataset import ItemStatus
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util import cast
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class RestrictedUnpickler(pickle.Unpickler):
Expand Down Expand Up @@ -65,6 +66,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
if has_meta_file(path):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

label_cat = LabelCategories()

meta_file = osp.join(path, CifarPath.META_10_FILE)
Expand Down Expand Up @@ -175,6 +180,9 @@ class CifarConverter(Converter):
def apply(self):
os.makedirs(self._save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

label_categories = self._extractor.categories()[AnnotationType.label]
label_names = []
coarse_label_names = []
Expand Down
13 changes: 10 additions & 3 deletions datumaro/plugins/kitti_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import save_image
from datumaro.util.mask_tools import paint_mask
from datumaro.util.meta_file_util import is_meta_file, parse_meta_file

from .format import (
KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, parse_label_map,
Expand Down Expand Up @@ -156,8 +157,11 @@ def get_label(self, label_id):
categories()[AnnotationType.label].items[label_id].name

def save_label_map(self):
path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE)
write_label_map(path, self._label_map)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)
else:
path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE)
write_label_map(path, self._label_map)

def _load_categories(self, label_map_source):
if label_map_source == LabelmapType.kitti.name:
Expand Down Expand Up @@ -188,7 +192,10 @@ def _load_categories(self, label_map_source):
sorted(label_map_source.items(), key=lambda e: e[0]))

elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
label_map = parse_label_map(label_map_source)
if is_meta_file(label_map_source):
label_map = parse_meta_file(label_map_source)
else:
label_map = parse_label_map(label_map_source)

else:
raise Exception("Wrong labelmap specified, "
Expand Down
17 changes: 13 additions & 4 deletions datumaro/plugins/kitti_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
AnnotationType, DatasetItem, SourceExtractor,
)
from datumaro.util.image import find_images, load_image
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

from .format import (
KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, parse_label_map,
Expand All @@ -36,15 +37,23 @@ def _load_categories(self, path):
if self._task == KittiTask.segmentation:
return self._load_categories_segmentation(path)
elif self._task == KittiTask.detection:
if has_meta_file(path):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(path).keys())) }

return {AnnotationType.label: LabelCategories()}

def _load_categories_segmentation(self, path):
label_map = None
label_map_path = osp.join(path, KittiPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
if has_meta_file(path):
label_map = parse_meta_file(path)
else:
label_map = KittiLabelMap
label_map_path = osp.join(path, KittiPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
else:
label_map = KittiLabelMap

self._labels = [label for label in label_map]
return make_kitti_categories(label_map)

Expand Down
9 changes: 9 additions & 0 deletions datumaro/plugins/lfw_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class LfwPath:
Expand Down Expand Up @@ -44,6 +45,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }

label_cat = LabelCategories()
if osp.isfile(path):
with open(path, encoding='utf-8') as labels_file:
Expand Down Expand Up @@ -188,6 +193,10 @@ class LfwConverter(Converter):
DEFAULT_IMAGE_EXT = LfwPath.IMAGE_EXT

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
label_categories = self._extractor.categories()[AnnotationType.label]
labels = {label.name: 0 for label in label_categories}
Expand Down
8 changes: 8 additions & 0 deletions datumaro/plugins/mnist_csv_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from datumaro.components.converter import Converter
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class MnistCsvPath:
Expand All @@ -35,6 +36,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }

label_cat = LabelCategories()

labels_file = osp.join(self._dataset_dir, 'labels.txt')
Expand Down Expand Up @@ -100,6 +105,9 @@ class MnistCsvConverter(Converter):

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
data = []
item_ids = {}
Expand Down
8 changes: 8 additions & 0 deletions datumaro/plugins/mnist_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from datumaro.components.converter import Converter
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class MnistPath:
Expand Down Expand Up @@ -43,6 +44,10 @@ def __init__(self, path, subset=None):
self._items = list(self._load_items(path).values())

def _load_categories(self):
if has_meta_file(self._dataset_dir):
return { AnnotationType.label: LabelCategories().
from_iterable(list(parse_meta_file(self._dataset_dir).keys())) }

label_cat = LabelCategories()

labels_file = osp.join(self._dataset_dir, 'labels.txt')
Expand Down Expand Up @@ -116,6 +121,9 @@ class MnistConverter(Converter):

def apply(self):
os.makedirs(self._save_dir, exist_ok=True)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

for subset_name, subset in self._extractor.subsets().items():
labels = []
images = np.array([])
Expand Down
3 changes: 3 additions & 0 deletions datumaro/plugins/synthia_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util.image import find_images, load_image
from datumaro.util.mask_tools import generate_colormap, lazy_mask
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class SynthiaPath:
Expand Down Expand Up @@ -98,6 +99,8 @@ def __init__(self, path):
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
if has_meta_file(path):
return make_categories(parse_meta_file(path))
label_map_path = osp.join(path, SynthiaPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
Expand Down
3 changes: 3 additions & 0 deletions datumaro/plugins/yolo_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def apply(self):

os.makedirs(save_dir, exist_ok=True)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

label_categories = extractor.categories()[AnnotationType.label]
label_ids = {label.name: idx
for idx, label in enumerate(label_categories.items)}
Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/yolo_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datumaro.util.image import (
DEFAULT_IMAGE_META_FILE_NAME, load_image_meta_file,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
from datumaro.util.os_util import split_path

from .format import YoloPath
Expand Down Expand Up @@ -171,6 +172,10 @@ def _parse_annotations(anno_path, image):

@staticmethod
def _load_categories(names_path):
if has_meta_file(osp.dirname(names_path)):
return LabelCategories().from_iterable(
list(parse_meta_file(osp.dirname(names_path)).keys()))

label_categories = LabelCategories()

with open(names_path, 'r', encoding='utf-8') as f:
Expand Down
3 changes: 3 additions & 0 deletions site/content/en/docs/formats/ade20k2017.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ ADE20K dataset directory should have the following structure:
<!--lint disable fenced-code-flag-->
```
dataset/
├── dataset_meta.json # a list of non-format labels (optional)
├── subset1/
│ └── super_label_1/
│ ├── img1.jpg
Expand Down Expand Up @@ -82,6 +83,8 @@ image. Each line in the text file contains:
Each column is separated by a `#`. See example of dataset
[here](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset).

To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).

## Export to other formats

Datumaro can convert an ADE20K dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
Expand Down
3 changes: 3 additions & 0 deletions site/content/en/docs/formats/ade20k2020.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ ADE20K dataset directory should have the following structure:
<!--lint disable fenced-code-flag-->
```
dataset/
├── dataset_meta.json # a list of non-format labels (optional)
├── subset1/
│ ├── img1/ # directory with instance masks for img1
│ | ├── instance_001_img1.png
Expand Down Expand Up @@ -101,6 +102,8 @@ See our [tests asset](https://github.com/openvinotoolkit/datumaro/tree/develop/t
for example of this file,
or check [ADE20K toolkit](https://github.com/CSAILVision/ADE20K) for it.

To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).

## Export to other formats

Datumaro can convert an ADE20K dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
Expand Down
3 changes: 3 additions & 0 deletions site/content/en/docs/formats/align_celeba.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Align CelebA dataset directory should have the following structure:
<!--lint disable fenced-code-flag-->
```
dataset/
├── dataset_meta.json # a list of non-format labels (optional)
├── Anno/
│   ├── identity_CelebA.txt
│   ├── list_attr_celeba.txt
Expand All @@ -69,6 +70,8 @@ landmarks and subsets respectively (optional).
The original CelebA dataset stores images in a .7z archive. The archive
needs to be unpacked before importing.

To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).

## Export to other formats

Datumaro can convert an align CelebA dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
Expand Down
Loading