diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c87c68660..510dc94255 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - Import for SYNTHIA dataset format () -- Support for Accuracy Checker dataset meta files in formats - () - Support of `score` attribute in KITTI detetion () +- Support for Accuracy Checker dataset meta files in formats + (, + ) ### Changed - The following formats can now be detected unambiguously: diff --git a/datumaro/plugins/ade20k2017_format.py b/datumaro/plugins/ade20k2017_format.py index 25ab8f177e..0bcffa27f4 100644 --- a/datumaro/plugins/ade20k2017_format.py +++ b/datumaro/plugins/ade20k2017_format.py @@ -18,6 +18,7 @@ from datumaro.util.image import ( IMAGE_EXTENSIONS, find_images, lazy_image, load_image, ) +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class Ade20k2017Path: @@ -31,7 +32,9 @@ def __init__(self, path): if not osp.isdir(path): raise FileNotFoundError("Can't read dataset directory '%s'" % path) - subsets = os.listdir(path) + # exclude dataset meta file + subsets = [subset for subset in os.listdir(path) + if osp.splitext(subset)[-1] != '.json'] if len(subsets) < 1: raise FileNotFoundError("Can't read subsets in directory '%s'" % path) @@ -41,6 +44,10 @@ def __init__(self, path): self._items = [] self._categories = {} + if has_meta_file(self._path): + self._categories = { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(self._path).keys())) } + for subset in self._subsets: self._load_items(subset) diff --git a/datumaro/plugins/ade20k2020_format.py b/datumaro/plugins/ade20k2020_format.py index fa20c48dd5..38c4b5317d 100644 --- a/datumaro/plugins/ade20k2020_format.py +++ b/datumaro/plugins/ade20k2020_format.py @@ -19,6 +19,7 @@ from datumaro.util.image import ( IMAGE_EXTENSIONS, find_images, lazy_image, load_image, ) +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class Ade20k2020Path: @@ -33,7 +34,9 @@ def __init__(self, path): if not osp.isdir(path): raise FileNotFoundError("Can't read dataset directory '%s'" % path) - subsets = os.listdir(path) + # exclude dataset meta file + subsets = [subset for subset in os.listdir(path) + if osp.splitext(subset)[-1] != '.json'] if len(subsets) < 1: raise FileNotFoundError("Can't read subsets in directory '%s'" % path) @@ -43,6 +46,10 @@ def __init__(self, path): self._items = [] self._categories = {} + if has_meta_file(self._path): + self._categories = { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(self._path).keys())) } + for subset in self._subsets: self._load_items(subset) diff --git a/datumaro/plugins/align_celeba_format.py b/datumaro/plugins/align_celeba_format.py index 1e3a6f0ebc..6a51aae885 100644 --- a/datumaro/plugins/align_celeba_format.py +++ b/datumaro/plugins/align_celeba_format.py @@ -10,6 +10,7 @@ from datumaro.components.errors import DatasetImportError from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor from datumaro.util.image import find_images +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class AlignCelebaPath: @@ -32,6 +33,10 @@ def __init__(self, path): self._anno_dir = osp.dirname(path) self._categories = { AnnotationType.label: LabelCategories() } + if has_meta_file(path): + self._categories = { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(path).keys())) } + self._items = list(self._load_items(path).values()) def _load_items(self, root_dir): diff --git a/datumaro/plugins/celeba_format.py b/datumaro/plugins/celeba_format.py index d7624b5dee..639a7c7b5a 100644 --- a/datumaro/plugins/celeba_format.py +++ b/datumaro/plugins/celeba_format.py @@ -10,6 +10,7 @@ from datumaro.components.errors import DatasetImportError from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor from datumaro.util.image import find_images +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class CelebaPath: @@ -30,6 +31,10 @@ def __init__(self, path): super().__init__() self._categories = { AnnotationType.label: LabelCategories() } + if has_meta_file(path): + self._categories = { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(path).keys())) } + self._items = list(self._load_items(path).values()) def _load_items(self, root_dir): diff --git a/datumaro/plugins/cifar_format.py b/datumaro/plugins/cifar_format.py index 8ed8ead22c..f372f1304a 100644 --- a/datumaro/plugins/cifar_format.py +++ b/datumaro/plugins/cifar_format.py @@ -17,6 +17,7 @@ from datumaro.components.dataset import ItemStatus from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor from datumaro.util import cast +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class RestrictedUnpickler(pickle.Unpickler): @@ -65,6 +66,10 @@ def __init__(self, path, subset=None): self._items = list(self._load_items(path).values()) def _load_categories(self, path): + if has_meta_file(path): + return { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(path).keys())) } + label_cat = LabelCategories() meta_file = osp.join(path, CifarPath.META_10_FILE) @@ -175,6 +180,9 @@ class CifarConverter(Converter): def apply(self): os.makedirs(self._save_dir, exist_ok=True) + if self._save_dataset_meta: + self._save_meta_file(self._save_dir) + label_categories = self._extractor.categories()[AnnotationType.label] label_names = [] coarse_label_names = [] diff --git a/datumaro/plugins/kitti_format/converter.py b/datumaro/plugins/kitti_format/converter.py index ae7b0e821e..df187a8c6c 100644 --- a/datumaro/plugins/kitti_format/converter.py +++ b/datumaro/plugins/kitti_format/converter.py @@ -18,6 +18,7 @@ from datumaro.util.annotation_util import make_label_id_mapping from datumaro.util.image import save_image from datumaro.util.mask_tools import paint_mask +from datumaro.util.meta_file_util import is_meta_file, parse_meta_file from .format import ( KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, parse_label_map, @@ -156,8 +157,11 @@ def get_label(self, label_id): categories()[AnnotationType.label].items[label_id].name def save_label_map(self): - path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE) - write_label_map(path, self._label_map) + if self._save_dataset_meta: + self._save_meta_file(self._save_dir) + else: + path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE) + write_label_map(path, self._label_map) def _load_categories(self, label_map_source): if label_map_source == LabelmapType.kitti.name: @@ -188,7 +192,10 @@ def _load_categories(self, label_map_source): sorted(label_map_source.items(), key=lambda e: e[0])) elif isinstance(label_map_source, str) and osp.isfile(label_map_source): - label_map = parse_label_map(label_map_source) + if is_meta_file(label_map_source): + label_map = parse_meta_file(label_map_source) + else: + label_map = parse_label_map(label_map_source) else: raise Exception("Wrong labelmap specified, " diff --git a/datumaro/plugins/kitti_format/extractor.py b/datumaro/plugins/kitti_format/extractor.py index 7d216f4ffc..558e718b2f 100644 --- a/datumaro/plugins/kitti_format/extractor.py +++ b/datumaro/plugins/kitti_format/extractor.py @@ -12,6 +12,7 @@ AnnotationType, DatasetItem, SourceExtractor, ) from datumaro.util.image import find_images, load_image +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file from .format import ( KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, parse_label_map, @@ -36,15 +37,23 @@ def _load_categories(self, path): if self._task == KittiTask.segmentation: return self._load_categories_segmentation(path) elif self._task == KittiTask.detection: + if has_meta_file(path): + return { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(path).keys())) } + return {AnnotationType.label: LabelCategories()} def _load_categories_segmentation(self, path): label_map = None - label_map_path = osp.join(path, KittiPath.LABELMAP_FILE) - if osp.isfile(label_map_path): - label_map = parse_label_map(label_map_path) + if has_meta_file(path): + label_map = parse_meta_file(path) else: - label_map = KittiLabelMap + label_map_path = osp.join(path, KittiPath.LABELMAP_FILE) + if osp.isfile(label_map_path): + label_map = parse_label_map(label_map_path) + else: + label_map = KittiLabelMap + self._labels = [label for label in label_map] return make_kitti_categories(label_map) diff --git a/datumaro/plugins/lfw_format.py b/datumaro/plugins/lfw_format.py index 63ab3148ee..4bd3724cce 100644 --- a/datumaro/plugins/lfw_format.py +++ b/datumaro/plugins/lfw_format.py @@ -13,6 +13,7 @@ from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor from datumaro.components.format_detection import FormatDetectionContext from datumaro.util.image import find_images +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class LfwPath: @@ -44,6 +45,10 @@ def __init__(self, path, subset=None): self._items = list(self._load_items(path).values()) def _load_categories(self, path): + if has_meta_file(self._dataset_dir): + return { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(self._dataset_dir).keys())) } + label_cat = LabelCategories() if osp.isfile(path): with open(path, encoding='utf-8') as labels_file: @@ -188,6 +193,10 @@ class LfwConverter(Converter): DEFAULT_IMAGE_EXT = LfwPath.IMAGE_EXT def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + if self._save_dataset_meta: + self._save_meta_file(self._save_dir) + for subset_name, subset in self._extractor.subsets().items(): label_categories = self._extractor.categories()[AnnotationType.label] labels = {label.name: 0 for label in label_categories} diff --git a/datumaro/plugins/mnist_csv_format.py b/datumaro/plugins/mnist_csv_format.py index d79525e347..e6ec6c1fb5 100644 --- a/datumaro/plugins/mnist_csv_format.py +++ b/datumaro/plugins/mnist_csv_format.py @@ -12,6 +12,7 @@ ) from datumaro.components.converter import Converter from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class MnistCsvPath: @@ -35,6 +36,10 @@ def __init__(self, path, subset=None): self._items = list(self._load_items(path).values()) def _load_categories(self): + if has_meta_file(self._dataset_dir): + return { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(self._dataset_dir).keys())) } + label_cat = LabelCategories() labels_file = osp.join(self._dataset_dir, 'labels.txt') @@ -100,6 +105,9 @@ class MnistCsvConverter(Converter): def apply(self): os.makedirs(self._save_dir, exist_ok=True) + if self._save_dataset_meta: + self._save_meta_file(self._save_dir) + for subset_name, subset in self._extractor.subsets().items(): data = [] item_ids = {} diff --git a/datumaro/plugins/mnist_format.py b/datumaro/plugins/mnist_format.py index fe8ca91f85..4c89857c68 100644 --- a/datumaro/plugins/mnist_format.py +++ b/datumaro/plugins/mnist_format.py @@ -13,6 +13,7 @@ ) from datumaro.components.converter import Converter from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class MnistPath: @@ -43,6 +44,10 @@ def __init__(self, path, subset=None): self._items = list(self._load_items(path).values()) def _load_categories(self): + if has_meta_file(self._dataset_dir): + return { AnnotationType.label: LabelCategories(). + from_iterable(list(parse_meta_file(self._dataset_dir).keys())) } + label_cat = LabelCategories() labels_file = osp.join(self._dataset_dir, 'labels.txt') @@ -116,6 +121,9 @@ class MnistConverter(Converter): def apply(self): os.makedirs(self._save_dir, exist_ok=True) + if self._save_dataset_meta: + self._save_meta_file(self._save_dir) + for subset_name, subset in self._extractor.subsets().items(): labels = [] images = np.array([]) diff --git a/datumaro/plugins/synthia_format.py b/datumaro/plugins/synthia_format.py index a318d97103..5754d6bb6e 100644 --- a/datumaro/plugins/synthia_format.py +++ b/datumaro/plugins/synthia_format.py @@ -14,6 +14,7 @@ from datumaro.components.format_detection import FormatDetectionContext from datumaro.util.image import find_images, load_image from datumaro.util.mask_tools import generate_colormap, lazy_mask +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file class SynthiaPath: @@ -98,6 +99,8 @@ def __init__(self, path): self._items = list(self._load_items(path).values()) def _load_categories(self, path): + if has_meta_file(path): + return make_categories(parse_meta_file(path)) label_map_path = osp.join(path, SynthiaPath.LABELMAP_FILE) if osp.isfile(label_map_path): label_map = parse_label_map(label_map_path) diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py index 99896fa888..7c7be42777 100644 --- a/datumaro/plugins/yolo_format/converter.py +++ b/datumaro/plugins/yolo_format/converter.py @@ -35,6 +35,9 @@ def apply(self): os.makedirs(save_dir, exist_ok=True) + if self._save_dataset_meta: + self._save_meta_file(self._save_dir) + label_categories = extractor.categories()[AnnotationType.label] label_ids = {label.name: idx for idx, label in enumerate(label_categories.items)} diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py index ac4f9076cc..0fbae06a33 100644 --- a/datumaro/plugins/yolo_format/extractor.py +++ b/datumaro/plugins/yolo_format/extractor.py @@ -15,6 +15,7 @@ from datumaro.util.image import ( DEFAULT_IMAGE_META_FILE_NAME, load_image_meta_file, ) +from datumaro.util.meta_file_util import has_meta_file, parse_meta_file from datumaro.util.os_util import split_path from .format import YoloPath @@ -171,6 +172,10 @@ def _parse_annotations(anno_path, image): @staticmethod def _load_categories(names_path): + if has_meta_file(osp.dirname(names_path)): + return LabelCategories().from_iterable( + list(parse_meta_file(osp.dirname(names_path)).keys())) + label_categories = LabelCategories() with open(names_path, 'r', encoding='utf-8') as f: diff --git a/site/content/en/docs/formats/ade20k2017.md b/site/content/en/docs/formats/ade20k2017.md index a3c523cd3d..6c3fc6bc52 100644 --- a/site/content/en/docs/formats/ade20k2017.md +++ b/site/content/en/docs/formats/ade20k2017.md @@ -43,6 +43,7 @@ ADE20K dataset directory should have the following structure: ``` dataset/ +├── dataset_meta.json # a list of non-format labels (optional) ├── subset1/ │ └── super_label_1/ │ ├── img1.jpg @@ -82,6 +83,8 @@ image. Each line in the text file contains: Each column is separated by a `#`. See example of dataset [here](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset). +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). + ## Export to other formats Datumaro can convert an ADE20K dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/). diff --git a/site/content/en/docs/formats/ade20k2020.md b/site/content/en/docs/formats/ade20k2020.md index af4b912e24..fe79939291 100644 --- a/site/content/en/docs/formats/ade20k2020.md +++ b/site/content/en/docs/formats/ade20k2020.md @@ -43,6 +43,7 @@ ADE20K dataset directory should have the following structure: ``` dataset/ +├── dataset_meta.json # a list of non-format labels (optional) ├── subset1/ │ ├── img1/ # directory with instance masks for img1 │ | ├── instance_001_img1.png @@ -101,6 +102,8 @@ See our [tests asset](https://github.com/openvinotoolkit/datumaro/tree/develop/t for example of this file, or check [ADE20K toolkit](https://github.com/CSAILVision/ADE20K) for it. +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). + ## Export to other formats Datumaro can convert an ADE20K dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/). diff --git a/site/content/en/docs/formats/align_celeba.md b/site/content/en/docs/formats/align_celeba.md index 59bb79c3b9..dca8ad5ff0 100644 --- a/site/content/en/docs/formats/align_celeba.md +++ b/site/content/en/docs/formats/align_celeba.md @@ -48,6 +48,7 @@ Align CelebA dataset directory should have the following structure: ``` dataset/ +├── dataset_meta.json # a list of non-format labels (optional) ├── Anno/ │   ├── identity_CelebA.txt │   ├── list_attr_celeba.txt @@ -69,6 +70,8 @@ landmarks and subsets respectively (optional). The original CelebA dataset stores images in a .7z archive. The archive needs to be unpacked before importing. +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). + ## Export to other formats Datumaro can convert an align CelebA dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/). diff --git a/site/content/en/docs/formats/celeba.md b/site/content/en/docs/formats/celeba.md index 7bf40fbf9c..e975a1eedd 100644 --- a/site/content/en/docs/formats/celeba.md +++ b/site/content/en/docs/formats/celeba.md @@ -48,6 +48,7 @@ CelebA dataset directory should have the following structure: ``` dataset/ +├── dataset_meta.json # a list of non-format labels (optional) ├── Anno/ │   ├── identity_CelebA.txt │   ├── list_attr_celeba.txt @@ -71,6 +72,8 @@ attributes, bounding boxes, landmarks and subsets respectively The original CelebA dataset stores images in a .7z archive. The archive needs to be unpacked before importing. +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). + ## Export to other formats Datumaro can convert a CelebA dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/). diff --git a/site/content/en/docs/formats/cifar.md b/site/content/en/docs/formats/cifar.md index 8f69aaf94e..b21177f016 100644 --- a/site/content/en/docs/formats/cifar.md +++ b/site/content/en/docs/formats/cifar.md @@ -46,6 +46,7 @@ CIFAR-10 dataset directory should have the following structure: ``` └─ Dataset/ + ├── dataset_meta.json # a list of non-format labels (optional) ├── batches.meta ├── ├── @@ -57,6 +58,7 @@ CIFAR-100 dataset directory should have the following structure: ``` └─ Dataset/ + ├── dataset_meta.json # a list of non-format labels (optional) ├── meta ├── ├── @@ -100,6 +102,8 @@ CIFAR-100: 'coarse_labels': list of integers ``` +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). + ## Export to other formats Datumaro can convert a CIFAR dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats). @@ -147,6 +151,8 @@ Extra options for exporting to CIFAR format: (by default `False`) - `--image-ext ` allow to specify image extension for exporting the dataset (by default `.png`) +- `--save-dataset-meta` - allow to export dataset with saving dataset meta + file (by default `False`) The format (CIFAR-10 or CIFAR-100) in which the dataset will be exported depends on the presence of superclasses in the `LabelCategories`. diff --git a/site/content/en/docs/formats/cityscapes.md b/site/content/en/docs/formats/cityscapes.md index d37f6838cd..e110dac60c 100644 --- a/site/content/en/docs/formats/cityscapes.md +++ b/site/content/en/docs/formats/cityscapes.md @@ -35,8 +35,8 @@ Cityscapes dataset directory should have the following structure: ``` └─ Dataset/ - ├── dataset_meta.json # a list of non-Pascal labels (optional) - ├── label_colors.txt # a list of non-Pascal labels in other format (optional) + ├── dataset_meta.json # a list of non-Cityscapes labels (optional) + ├── label_colors.txt # a list of non-Cityscapes labels in other format (optional) ├── imgsFine/ │ ├── leftImg8bit │ │ ├── @@ -68,12 +68,11 @@ Annotated files description: is the instance ID. If a certain annotation describes multiple instances, then the pixels have the regular ID of that class -To add unformatted classes, you can use `dataset_meta.json` and `label_colors.txt`. +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +and `label_colors.txt`. If the `dataset_meta.json` is not represented in the dataset, then `label_colors.txt` will be imported if possible. -Information about `dataset_meta.json` can be found [here](/docs/user_manual/supported_formats/#dataset-meta-file). - In `label_colors.txt` you can define custom color map and non-cityscapes labels, for example: @@ -130,7 +129,7 @@ Extra options for exporting to Cityscapes format: (by default `False`) - `--image-ext IMAGE_EXT` allow to specify image extension for exporting dataset (by default - keep original or use `.png`, if none) -- `--save-meta-file` - allow to export dataset with saving dataset meta +- `--save-dataset-meta` - allow to export dataset with saving dataset meta file (by default `False`) - `--label_map` allow to define a custom colormap. Example: diff --git a/site/content/en/docs/formats/kitti.md b/site/content/en/docs/formats/kitti.md index 178d48bed2..fde450c3d7 100644 --- a/site/content/en/docs/formats/kitti.md +++ b/site/content/en/docs/formats/kitti.md @@ -70,6 +70,7 @@ KITTI segmentation dataset directory should have the following structure: ``` └─ Dataset/ + ├── dataset_meta.json # a list of non-format labels (optional) ├── label_colors.txt # optional, color map for non-original segmentation labels ├── testing/ │ └── image_2/ @@ -99,6 +100,11 @@ KITTI segmentation dataset directory should have the following structure: └── ... ``` +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +and `label_colors.txt`. +If the `dataset_meta.json` is not represented in the dataset, then +`label_colors.txt` will be imported if possible. + You can import a dataset for specific tasks of KITTI dataset instead of the whole dataset, for example: @@ -156,6 +162,8 @@ Extra options for exporting to KITTI format: (by default `False`) - `--image-ext IMAGE_EXT` allow to specify image extension for exporting dataset (by default - keep original or use `.png`, if none) +- `--save-dataset-meta` - allow to export dataset with saving dataset meta + file (by default `False`) - `--apply-colormap APPLY_COLORMAP` allow to use colormap for class masks (in folder `semantic_rgb`, by default `True`) - `--label_map` allow to define a custom colormap. Example: diff --git a/site/content/en/docs/formats/mnist.md b/site/content/en/docs/formats/mnist.md index 1744fea115..26628dfa21 100644 --- a/site/content/en/docs/formats/mnist.md +++ b/site/content/en/docs/formats/mnist.md @@ -68,7 +68,8 @@ MNIST dataset directory should have the following structure: ``` └─ Dataset/ - ├── labels.txt # list of non-digit labels (optional) + ├── dataset_meta.json # a list of non-format labels (optional) + ├── labels.txt # a list of non-digit labels in other format (optional) ├── t10k-images-idx3-ubyte.gz ├── t10k-labels-idx1-ubyte.gz ├── train-images-idx3-ubyte.gz @@ -80,14 +81,18 @@ MNIST in CSV dataset directory should have the following structure: ``` └─ Dataset/ - ├── labels.txt # list of non-digit labels (optional) + ├── dataset_meta.json # a list of non-format labels (optional) + ├── labels.txt # a list of non-digit labels in other format (optional) ├── mnist_test.csv └── mnist_train.csv ``` -If the dataset needs non-digit labels, you need to add the `labels.txt` -to the dataset folder. For example, `labels.txt` for Fashion MNIST the -following contents: +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +and `labels.txt`. +If the `dataset_meta.json` is not represented in the dataset, then +`labels.txt` will be imported if possible. + +For example, `labels.txt` for Fashion MNIST the following contents: ``` @@ -150,6 +155,8 @@ Extra options for exporting to MNIST format: (by default `False`) - `--image-ext ` allow to specify image extension for exporting dataset (by default `.png`) +- `--save-dataset-meta` - allow to export dataset with saving dataset meta + file (by default `False`) These commands also work for MNIST in CSV if you use `mnist_csv` instead of `mnist`. diff --git a/site/content/en/docs/formats/pascal_voc.md b/site/content/en/docs/formats/pascal_voc.md index b279ceb64a..4c3fe2713b 100644 --- a/site/content/en/docs/formats/pascal_voc.md +++ b/site/content/en/docs/formats/pascal_voc.md @@ -108,12 +108,11 @@ These directories contain `.txt` files with a list of images in a subset, the subset name is the same as the `.txt` file name. Subset names can be arbitrary. -To add unformatted classes, you can use `dataset_meta.json` and `label_map.txt`. +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +and `label_map.txt`. If the `dataset_meta.json` is not represented in the dataset, then `label_map.txt` will be imported if possible. -Information about `dataset_meta.json` can be found [here](/docs/user_manual/supported_formats/#dataset-meta-file). - In `label_map.txt` you can define custom color map and non-pascal labels, for example: @@ -202,7 +201,7 @@ Extra options for exporting to Pascal VOC format: (by default `False`) - `--image-ext IMAGE_EXT` - allow to specify image extension for exporting dataset (by default use original or `.jpg` if none) -- `--save-meta-file` - allow to export dataset with saving dataset meta +- `--save-dataset-meta` - allow to export dataset with saving dataset meta file (by default `False`) - `--apply-colormap APPLY_COLORMAP` - allow to use colormap for class and instance masks (by default `True`) diff --git a/site/content/en/docs/formats/synthia.md b/site/content/en/docs/formats/synthia.md index 2b230c5730..6397cd67cf 100644 --- a/site/content/en/docs/formats/synthia.md +++ b/site/content/en/docs/formats/synthia.md @@ -40,6 +40,7 @@ SYNTHIA dataset directory should have the following structure: ``` dataset/ +├── dataset_meta.json # a list of non-format labels (optional) ├── GT/ │   ├── COLOR/ │   │   ├── Stereo_Left/ @@ -97,6 +98,7 @@ If it is missing, `GT/COLOR` folder will be used. The original dataset also contains depth information, but Datumaro does not currently support it. +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). ## Export to other formats diff --git a/site/content/en/docs/formats/yolo.md b/site/content/en/docs/formats/yolo.md index 3bf44248e1..33e50ea2f3 100644 --- a/site/content/en/docs/formats/yolo.md +++ b/site/content/en/docs/formats/yolo.md @@ -35,6 +35,7 @@ YOLO dataset directory should have the following structure: ``` └─ yolo_dataset/ │ + ├── dataset_meta.json # a list of non-format labels (optional) ├── obj.names # file with list of classes ├── obj.data # file with dataset information ├── train.txt # list of image paths in train subset @@ -97,6 +98,8 @@ Here `x_center`, `y_center`, `width`, and `height` are relative to the image's width and height. The `x_center` and `y_center` are center of rectangle (are not top-left corner). +To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). + ## Export to other formats Datumaro can convert YOLO dataset into any other format diff --git a/tests/assets/ade20k2017_dataset/training/street/1.jpg b/tests/assets/ade20k2017_dataset/dataset/training/street/1.jpg similarity index 100% rename from tests/assets/ade20k2017_dataset/training/street/1.jpg rename to tests/assets/ade20k2017_dataset/dataset/training/street/1.jpg diff --git a/tests/assets/ade20k2017_dataset/training/street/1_atr.txt b/tests/assets/ade20k2017_dataset/dataset/training/street/1_atr.txt similarity index 100% rename from tests/assets/ade20k2017_dataset/training/street/1_atr.txt rename to tests/assets/ade20k2017_dataset/dataset/training/street/1_atr.txt diff --git a/tests/assets/ade20k2017_dataset/training/street/1_parts_1.png b/tests/assets/ade20k2017_dataset/dataset/training/street/1_parts_1.png similarity index 100% rename from tests/assets/ade20k2017_dataset/training/street/1_parts_1.png rename to tests/assets/ade20k2017_dataset/dataset/training/street/1_parts_1.png diff --git a/tests/assets/ade20k2017_dataset/training/street/1_seg.png b/tests/assets/ade20k2017_dataset/dataset/training/street/1_seg.png similarity index 100% rename from tests/assets/ade20k2017_dataset/training/street/1_seg.png rename to tests/assets/ade20k2017_dataset/dataset/training/street/1_seg.png diff --git a/tests/assets/ade20k2017_dataset/validation/2.jpg b/tests/assets/ade20k2017_dataset/dataset/validation/2.jpg similarity index 100% rename from tests/assets/ade20k2017_dataset/validation/2.jpg rename to tests/assets/ade20k2017_dataset/dataset/validation/2.jpg diff --git a/tests/assets/ade20k2017_dataset/validation/2_atr.txt b/tests/assets/ade20k2017_dataset/dataset/validation/2_atr.txt similarity index 100% rename from tests/assets/ade20k2017_dataset/validation/2_atr.txt rename to tests/assets/ade20k2017_dataset/dataset/validation/2_atr.txt diff --git a/tests/assets/ade20k2017_dataset/validation/2_parts_1.png b/tests/assets/ade20k2017_dataset/dataset/validation/2_parts_1.png similarity index 100% rename from tests/assets/ade20k2017_dataset/validation/2_parts_1.png rename to tests/assets/ade20k2017_dataset/dataset/validation/2_parts_1.png diff --git a/tests/assets/ade20k2017_dataset/validation/2_parts_2.png b/tests/assets/ade20k2017_dataset/dataset/validation/2_parts_2.png similarity index 100% rename from tests/assets/ade20k2017_dataset/validation/2_parts_2.png rename to tests/assets/ade20k2017_dataset/dataset/validation/2_parts_2.png diff --git a/tests/assets/ade20k2017_dataset/validation/2_seg.png b/tests/assets/ade20k2017_dataset/dataset/validation/2_seg.png similarity index 100% rename from tests/assets/ade20k2017_dataset/validation/2_seg.png rename to tests/assets/ade20k2017_dataset/dataset/validation/2_seg.png diff --git a/tests/assets/ade20k2017_dataset/dataset_with_meta_file/dataset_meta.json b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/dataset_meta.json new file mode 100644 index 0000000000..6fbf31a524 --- /dev/null +++ b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/dataset_meta.json @@ -0,0 +1,3 @@ +{ +"label_map": {"0": "sky", "1": "person", "2": "license plate", "3": "rim"} +} diff --git a/tests/assets/align_celeba_dataset/Img/img_align_celeba/000001.jpg b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1.jpg similarity index 100% rename from tests/assets/align_celeba_dataset/Img/img_align_celeba/000001.jpg rename to tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1.jpg diff --git a/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_atr.txt b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_atr.txt new file mode 100644 index 0000000000..4189f2130c --- /dev/null +++ b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_atr.txt @@ -0,0 +1,3 @@ +001 # 0 # 1 # sky # sky # "" +002 # 0 # 0 # person, individual, someone, somebody, mortal, soul # person # "walkin" +001 # 1 # 0 # license plate # license plate # "" diff --git a/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_parts_1.png b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_parts_1.png new file mode 100644 index 0000000000..7658479fb2 Binary files /dev/null and b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_parts_1.png differ diff --git a/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_seg.png b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_seg.png new file mode 100644 index 0000000000..7badc3ae0d Binary files /dev/null and b/tests/assets/ade20k2017_dataset/dataset_with_meta_file/training/street/1_seg.png differ diff --git a/tests/assets/ade20k2020_dataset/training/street/1.jpg b/tests/assets/ade20k2020_dataset/dataset/training/street/1.jpg similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1.jpg rename to tests/assets/ade20k2020_dataset/dataset/training/street/1.jpg diff --git a/tests/assets/ade20k2020_dataset/training/street/1.json b/tests/assets/ade20k2020_dataset/dataset/training/street/1.json similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1.json rename to tests/assets/ade20k2020_dataset/dataset/training/street/1.json diff --git a/tests/assets/ade20k2020_dataset/training/street/1/instance_000_ADE_train_1.png b/tests/assets/ade20k2020_dataset/dataset/training/street/1/instance_000_ADE_train_1.png similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1/instance_000_ADE_train_1.png rename to tests/assets/ade20k2020_dataset/dataset/training/street/1/instance_000_ADE_train_1.png diff --git a/tests/assets/ade20k2020_dataset/training/street/1/instance_001_ADE_train_1.png b/tests/assets/ade20k2020_dataset/dataset/training/street/1/instance_001_ADE_train_1.png similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1/instance_001_ADE_train_1.png rename to tests/assets/ade20k2020_dataset/dataset/training/street/1/instance_001_ADE_train_1.png diff --git a/tests/assets/ade20k2020_dataset/training/street/1/instance_002_ADE_train_1.png b/tests/assets/ade20k2020_dataset/dataset/training/street/1/instance_002_ADE_train_1.png similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1/instance_002_ADE_train_1.png rename to tests/assets/ade20k2020_dataset/dataset/training/street/1/instance_002_ADE_train_1.png diff --git a/tests/assets/ade20k2020_dataset/training/street/1_parts_1.png b/tests/assets/ade20k2020_dataset/dataset/training/street/1_parts_1.png similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1_parts_1.png rename to tests/assets/ade20k2020_dataset/dataset/training/street/1_parts_1.png diff --git a/tests/assets/ade20k2020_dataset/training/street/1_seg.png b/tests/assets/ade20k2020_dataset/dataset/training/street/1_seg.png similarity index 100% rename from tests/assets/ade20k2020_dataset/training/street/1_seg.png rename to tests/assets/ade20k2020_dataset/dataset/training/street/1_seg.png diff --git a/tests/assets/ade20k2020_dataset/validation/2.jpg b/tests/assets/ade20k2020_dataset/dataset/validation/2.jpg similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2.jpg rename to tests/assets/ade20k2020_dataset/dataset/validation/2.jpg diff --git a/tests/assets/ade20k2020_dataset/validation/2.json b/tests/assets/ade20k2020_dataset/dataset/validation/2.json similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2.json rename to tests/assets/ade20k2020_dataset/dataset/validation/2.json diff --git a/tests/assets/ade20k2020_dataset/validation/2/instance_000_ADE_val_2.png b/tests/assets/ade20k2020_dataset/dataset/validation/2/instance_000_ADE_val_2.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2/instance_000_ADE_val_2.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2/instance_000_ADE_val_2.png diff --git a/tests/assets/ade20k2020_dataset/validation/2/instance_001_ADE_val_2.png b/tests/assets/ade20k2020_dataset/dataset/validation/2/instance_001_ADE_val_2.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2/instance_001_ADE_val_2.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2/instance_001_ADE_val_2.png diff --git a/tests/assets/ade20k2020_dataset/validation/2/instance_002_ADE_val_2.png b/tests/assets/ade20k2020_dataset/dataset/validation/2/instance_002_ADE_val_2.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2/instance_002_ADE_val_2.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2/instance_002_ADE_val_2.png diff --git a/tests/assets/ade20k2020_dataset/validation/2/instance_003_ADE_val_2.png b/tests/assets/ade20k2020_dataset/dataset/validation/2/instance_003_ADE_val_2.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2/instance_003_ADE_val_2.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2/instance_003_ADE_val_2.png diff --git a/tests/assets/ade20k2020_dataset/validation/2_parts_1.png b/tests/assets/ade20k2020_dataset/dataset/validation/2_parts_1.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2_parts_1.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2_parts_1.png diff --git a/tests/assets/ade20k2020_dataset/validation/2_parts_2.png b/tests/assets/ade20k2020_dataset/dataset/validation/2_parts_2.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2_parts_2.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2_parts_2.png diff --git a/tests/assets/ade20k2020_dataset/validation/2_seg.png b/tests/assets/ade20k2020_dataset/dataset/validation/2_seg.png similarity index 100% rename from tests/assets/ade20k2020_dataset/validation/2_seg.png rename to tests/assets/ade20k2020_dataset/dataset/validation/2_seg.png diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/dataset_meta.json b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/dataset_meta.json new file mode 100644 index 0000000000..773ca0b192 --- /dev/null +++ b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/dataset_meta.json @@ -0,0 +1,3 @@ +{ +"label_map": {"0": "car", "1": "person", "2": "door", "3": "rim"} +} diff --git a/tests/assets/celeba_dataset/Img/img_celeba/000001.jpg b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1.jpg similarity index 100% rename from tests/assets/celeba_dataset/Img/img_celeba/000001.jpg rename to tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1.jpg diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1.json b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1.json new file mode 100644 index 0000000000..5531564711 --- /dev/null +++ b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1.json @@ -0,0 +1,110 @@ +{ + "annotation": { + "filename": "1.jpg", + "folder": "training/street", + "imsize": [ + 5, + 5, + 3 + ], + "source": { + "folder": "", + "filename": "", + "origin": "" + }, + "scene": [ + "outdoor", + "urban", + "street" + ], + "object": [ + { + "id": 0, + "name": "car, auto, automobile, machine, motorcar", + "name_ndx": 401, + "hypernym": [ + "car, auto, automobile, machine, motorcar", + "motor vehicle, automotive vehicle", + "self-propelled vehicle", + "wheeled vehicle", + "vehicle", + "conveyance, transport", + "instrumentality, instrumentation", + "artifact, artefact", + "whole, unit", + "object, physical object", + "physical entity", + "entity" + ], + "raw_name": "car", + "attributes": [], + "depth_ordering_rank": 2, + "occluded": [], + "crop": 0, + "parts": { + "hasparts": [], + "ispartof": [], + "part_level": 0 + }, + "instance_mask": "1/instance_000_ADE_train_1.png", + "polygon": { + "x": [], + "y": [] + }, + "saved_date": "1-Jan-1970 00:00:00" + }, + { + "id": 1, + "name": "person, individual, someone, somebody, mortal, soul", + "name_ndx": 1831, + "hypernym": [ + "person, individual, someone, somebody, mortal, soul", + "organism, being", + "living thing, animate thing", + "whole, unit", + "object, physical object", + "physical entity", + "entity" + ], + "raw_name": "person", + "attributes": ["walkin"], + "depth_ordering_rank": 1, + "occluded": "no", + "crop": 0, + "parts": { + "hasparts": [], + "ispartof": [], + "part_level": 0 + }, + "instance_mask": "1/instance_001_ADE_train_1.png", + "polygon": { + "x": [1, 1, 1, 1, 1], + "y": [0, 1, 2, 3, 4] + }, + "saved_date": "1-Jan-1970 00:00:00" + }, + { + "id": 2, + "name": "door", + "name_ndx": 774, + "hypernym": ["door", "doors", "garage door"], + "raw_name": "door", + "attributes": [], + "depth_ordering_rank": 3, + "occluded": "yes", + "crop": 0, + "parts": { + "hasparts": [], + "ispartof": 0, + "part_level": 1 + }, + "instance_mask": "1/instance_002_ADE_train_1.png", + "polygon": { + "x": [], + "y": [] + }, + "saved_date": "1-Jan-1970 00:00:00" + } + ] + } +} diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_000_ADE_train_1.png b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_000_ADE_train_1.png new file mode 100644 index 0000000000..00c12479f0 Binary files /dev/null and b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_000_ADE_train_1.png differ diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_001_ADE_train_1.png b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_001_ADE_train_1.png new file mode 100644 index 0000000000..7054099dff Binary files /dev/null and b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_001_ADE_train_1.png differ diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_002_ADE_train_1.png b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_002_ADE_train_1.png new file mode 100644 index 0000000000..1a54e43a1b Binary files /dev/null and b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1/instance_002_ADE_train_1.png differ diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1_parts_1.png b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1_parts_1.png new file mode 100644 index 0000000000..dd50d05d87 Binary files /dev/null and b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1_parts_1.png differ diff --git a/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1_seg.png b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1_seg.png new file mode 100644 index 0000000000..abbc546cd2 Binary files /dev/null and b/tests/assets/ade20k2020_dataset/dataset_with_meta_file/training/street/1_seg.png differ diff --git a/tests/assets/align_celeba_dataset/Anno/identity_CelebA.txt b/tests/assets/align_celeba_dataset/dataset/Anno/identity_CelebA.txt similarity index 100% rename from tests/assets/align_celeba_dataset/Anno/identity_CelebA.txt rename to tests/assets/align_celeba_dataset/dataset/Anno/identity_CelebA.txt diff --git a/tests/assets/align_celeba_dataset/Anno/list_attr_celeba.txt b/tests/assets/align_celeba_dataset/dataset/Anno/list_attr_celeba.txt similarity index 100% rename from tests/assets/align_celeba_dataset/Anno/list_attr_celeba.txt rename to tests/assets/align_celeba_dataset/dataset/Anno/list_attr_celeba.txt diff --git a/tests/assets/align_celeba_dataset/Anno/list_landmarks_align_celeba.txt b/tests/assets/align_celeba_dataset/dataset/Anno/list_landmarks_align_celeba.txt similarity index 100% rename from tests/assets/align_celeba_dataset/Anno/list_landmarks_align_celeba.txt rename to tests/assets/align_celeba_dataset/dataset/Anno/list_landmarks_align_celeba.txt diff --git a/tests/assets/align_celeba_dataset/Eval/list_eval_partition.txt b/tests/assets/align_celeba_dataset/dataset/Eval/list_eval_partition.txt similarity index 100% rename from tests/assets/align_celeba_dataset/Eval/list_eval_partition.txt rename to tests/assets/align_celeba_dataset/dataset/Eval/list_eval_partition.txt diff --git a/tests/assets/align_celeba_dataset/Img/img_align_celeba/000002.jpg b/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000001.jpg similarity index 100% rename from tests/assets/align_celeba_dataset/Img/img_align_celeba/000002.jpg rename to tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000001.jpg diff --git a/tests/assets/align_celeba_dataset/Img/img_align_celeba/000003.jpg b/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000002.jpg similarity index 100% rename from tests/assets/align_celeba_dataset/Img/img_align_celeba/000003.jpg rename to tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000002.jpg diff --git a/tests/assets/align_celeba_dataset/Img/img_align_celeba/000004.jpg b/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000003.jpg similarity index 100% rename from tests/assets/align_celeba_dataset/Img/img_align_celeba/000004.jpg rename to tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000003.jpg diff --git a/tests/assets/align_celeba_dataset/Img/img_align_celeba/000005.jpg b/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000004.jpg similarity index 100% rename from tests/assets/align_celeba_dataset/Img/img_align_celeba/000005.jpg rename to tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000004.jpg diff --git a/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000005.jpg b/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000005.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/align_celeba_dataset/dataset/Img/img_align_celeba/000005.jpg differ diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/Anno/identity_CelebA.txt b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Anno/identity_CelebA.txt new file mode 100644 index 0000000000..1e4441ce56 --- /dev/null +++ b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Anno/identity_CelebA.txt @@ -0,0 +1,5 @@ +000001.jpg 1 +000002.jpg 3 +000003.jpg 0 +000004.jpg 2 +000005.jpg 6 diff --git a/tests/assets/celeba_dataset/Eval/list_eval_partition.txt b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Eval/list_eval_partition.txt similarity index 100% rename from tests/assets/celeba_dataset/Eval/list_eval_partition.txt rename to tests/assets/align_celeba_dataset/dataset_with_meta_file/Eval/list_eval_partition.txt diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000001.jpg b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000001.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000001.jpg differ diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000002.jpg b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000002.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000002.jpg differ diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000003.jpg b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000003.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000003.jpg differ diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000004.jpg b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000004.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000004.jpg differ diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000005.jpg b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000005.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/align_celeba_dataset/dataset_with_meta_file/Img/img_align_celeba/000005.jpg differ diff --git a/tests/assets/align_celeba_dataset/dataset_with_meta_file/dataset_meta.json b/tests/assets/align_celeba_dataset/dataset_with_meta_file/dataset_meta.json new file mode 100644 index 0000000000..b63f44812e --- /dev/null +++ b/tests/assets/align_celeba_dataset/dataset_with_meta_file/dataset_meta.json @@ -0,0 +1,3 @@ +{ +"labels": ["class-0", "class-1", "class-2", "class-3", "class-4", "class-5", "class-6"] +} diff --git a/tests/assets/celeba_dataset/Anno/identity_CelebA.txt b/tests/assets/celeba_dataset/dataset/Anno/identity_CelebA.txt similarity index 100% rename from tests/assets/celeba_dataset/Anno/identity_CelebA.txt rename to tests/assets/celeba_dataset/dataset/Anno/identity_CelebA.txt diff --git a/tests/assets/celeba_dataset/Anno/list_attr_celeba.txt b/tests/assets/celeba_dataset/dataset/Anno/list_attr_celeba.txt similarity index 100% rename from tests/assets/celeba_dataset/Anno/list_attr_celeba.txt rename to tests/assets/celeba_dataset/dataset/Anno/list_attr_celeba.txt diff --git a/tests/assets/celeba_dataset/Anno/list_bbox_celeba.txt b/tests/assets/celeba_dataset/dataset/Anno/list_bbox_celeba.txt similarity index 100% rename from tests/assets/celeba_dataset/Anno/list_bbox_celeba.txt rename to tests/assets/celeba_dataset/dataset/Anno/list_bbox_celeba.txt diff --git a/tests/assets/celeba_dataset/Anno/list_landmarks_celeba.txt b/tests/assets/celeba_dataset/dataset/Anno/list_landmarks_celeba.txt similarity index 100% rename from tests/assets/celeba_dataset/Anno/list_landmarks_celeba.txt rename to tests/assets/celeba_dataset/dataset/Anno/list_landmarks_celeba.txt diff --git a/tests/assets/celeba_dataset/dataset/Eval/list_eval_partition.txt b/tests/assets/celeba_dataset/dataset/Eval/list_eval_partition.txt new file mode 100644 index 0000000000..620bc5cf82 --- /dev/null +++ b/tests/assets/celeba_dataset/dataset/Eval/list_eval_partition.txt @@ -0,0 +1,5 @@ +000001.jpg 0 +000002.jpg 0 +000003.jpg 1 +000004.jpg 2 +000005.jpg 2 diff --git a/tests/assets/celeba_dataset/Img/img_celeba/000002.jpg b/tests/assets/celeba_dataset/dataset/Img/img_celeba/000001.jpg similarity index 100% rename from tests/assets/celeba_dataset/Img/img_celeba/000002.jpg rename to tests/assets/celeba_dataset/dataset/Img/img_celeba/000001.jpg diff --git a/tests/assets/celeba_dataset/Img/img_celeba/000003.jpg b/tests/assets/celeba_dataset/dataset/Img/img_celeba/000002.jpg similarity index 100% rename from tests/assets/celeba_dataset/Img/img_celeba/000003.jpg rename to tests/assets/celeba_dataset/dataset/Img/img_celeba/000002.jpg diff --git a/tests/assets/celeba_dataset/Img/img_celeba/000004.jpg b/tests/assets/celeba_dataset/dataset/Img/img_celeba/000003.jpg similarity index 100% rename from tests/assets/celeba_dataset/Img/img_celeba/000004.jpg rename to tests/assets/celeba_dataset/dataset/Img/img_celeba/000003.jpg diff --git a/tests/assets/celeba_dataset/Img/img_celeba/000005.jpg b/tests/assets/celeba_dataset/dataset/Img/img_celeba/000004.jpg similarity index 100% rename from tests/assets/celeba_dataset/Img/img_celeba/000005.jpg rename to tests/assets/celeba_dataset/dataset/Img/img_celeba/000004.jpg diff --git a/tests/assets/celeba_dataset/dataset/Img/img_celeba/000005.jpg b/tests/assets/celeba_dataset/dataset/Img/img_celeba/000005.jpg new file mode 100644 index 0000000000..da0b2003e6 Binary files /dev/null and b/tests/assets/celeba_dataset/dataset/Img/img_celeba/000005.jpg differ diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Anno/identity_CelebA.txt b/tests/assets/celeba_dataset/dataset_with_meta_file/Anno/identity_CelebA.txt new file mode 100644 index 0000000000..1e4441ce56 --- /dev/null +++ b/tests/assets/celeba_dataset/dataset_with_meta_file/Anno/identity_CelebA.txt @@ -0,0 +1,5 @@ +000001.jpg 1 +000002.jpg 3 +000003.jpg 0 +000004.jpg 2 +000005.jpg 6 diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Eval/list_eval_partition.txt b/tests/assets/celeba_dataset/dataset_with_meta_file/Eval/list_eval_partition.txt new file mode 100644 index 0000000000..620bc5cf82 --- /dev/null +++ b/tests/assets/celeba_dataset/dataset_with_meta_file/Eval/list_eval_partition.txt @@ -0,0 +1,5 @@ +000001.jpg 0 +000002.jpg 0 +000003.jpg 1 +000004.jpg 2 +000005.jpg 2 diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000001.jpg b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000001.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000001.jpg differ diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000002.jpg b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000002.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000002.jpg differ diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000003.jpg b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000003.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000003.jpg differ diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000004.jpg b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000004.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000004.jpg differ diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000005.jpg b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000005.jpg new file mode 100644 index 0000000000..3bd4d24c49 Binary files /dev/null and b/tests/assets/celeba_dataset/dataset_with_meta_file/Img/img_celeba/000005.jpg differ diff --git a/tests/assets/celeba_dataset/dataset_with_meta_file/dataset_meta.json b/tests/assets/celeba_dataset/dataset_with_meta_file/dataset_meta.json new file mode 100644 index 0000000000..b63f44812e --- /dev/null +++ b/tests/assets/celeba_dataset/dataset_with_meta_file/dataset_meta.json @@ -0,0 +1,3 @@ +{ +"labels": ["class-0", "class-1", "class-2", "class-3", "class-4", "class-5", "class-6"] +} diff --git a/tests/assets/synthia_dataset/dataset_with_meta_file/GT/COLOR/Stereo_Left/Omni_F/000000.png b/tests/assets/synthia_dataset/dataset_with_meta_file/GT/COLOR/Stereo_Left/Omni_F/000000.png new file mode 100644 index 0000000000..04c48c8602 Binary files /dev/null and b/tests/assets/synthia_dataset/dataset_with_meta_file/GT/COLOR/Stereo_Left/Omni_F/000000.png differ diff --git a/tests/assets/synthia_dataset/dataset_with_meta_file/GT/COLOR/Stereo_Left/Omni_F/000001.png b/tests/assets/synthia_dataset/dataset_with_meta_file/GT/COLOR/Stereo_Left/Omni_F/000001.png new file mode 100644 index 0000000000..425cf55488 Binary files /dev/null and b/tests/assets/synthia_dataset/dataset_with_meta_file/GT/COLOR/Stereo_Left/Omni_F/000001.png differ diff --git a/tests/assets/synthia_dataset/dataset_with_meta_file/RGB/Stereo_Left/Omni_F/000000.png b/tests/assets/synthia_dataset/dataset_with_meta_file/RGB/Stereo_Left/Omni_F/000000.png new file mode 100644 index 0000000000..528f105467 Binary files /dev/null and b/tests/assets/synthia_dataset/dataset_with_meta_file/RGB/Stereo_Left/Omni_F/000000.png differ diff --git a/tests/assets/synthia_dataset/dataset_with_meta_file/RGB/Stereo_Left/Omni_F/000001.png b/tests/assets/synthia_dataset/dataset_with_meta_file/RGB/Stereo_Left/Omni_F/000001.png new file mode 100644 index 0000000000..528f105467 Binary files /dev/null and b/tests/assets/synthia_dataset/dataset_with_meta_file/RGB/Stereo_Left/Omni_F/000001.png differ diff --git a/tests/assets/synthia_dataset/dataset_with_meta_file/dataset_meta.json b/tests/assets/synthia_dataset/dataset_with_meta_file/dataset_meta.json new file mode 100644 index 0000000000..2357ef2623 --- /dev/null +++ b/tests/assets/synthia_dataset/dataset_with_meta_file/dataset_meta.json @@ -0,0 +1,5 @@ +{ +"label_map": {"0": "background", "1": "sky", "2": "building", "3": "person", "4": "road"}, +"background_label": "0", +"segmentation_colors": [[0, 0, 0], [0, 0, 64], [0, 128, 128], [128, 0, 64], [0, 192, 128]] +} diff --git a/tests/cli/test_revpath.py b/tests/cli/test_revpath.py index 681154eafc..f32a1f39fb 100644 --- a/tests/cli/test_revpath.py +++ b/tests/cli/test_revpath.py @@ -138,8 +138,8 @@ def test_ambiguous_format(self): assets_dir = osp.join(osp.dirname(__file__), '../assets') os.makedirs(annotation_dir) for asset in [ - 'ade20k2017_dataset/training/street/1_atr.txt', - 'ade20k2020_dataset/training/street/1.json', + 'ade20k2017_dataset/dataset/training/street/1_atr.txt', + 'ade20k2020_dataset/dataset/training/street/1.json', ]: shutil.copy(osp.join(assets_dir, asset), annotation_dir) diff --git a/tests/test_ade20k2017_format.py b/tests/test_ade20k2017_format.py index 3b057be516..5d41f375d7 100644 --- a/tests/test_ade20k2017_format.py +++ b/tests/test_ade20k2017_format.py @@ -15,7 +15,11 @@ from tests.requirements import Requirements, mark_requirement -DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'ade20k2017_dataset') +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'ade20k2017_dataset', + 'dataset') + +DUMMY_DATASET_DIR_META_FILE = osp.join(osp.dirname(__file__), 'assets', 'ade20k2017_dataset', + 'dataset_with_meta_file') class Ade20k2017ImporterTest(TestCase): @mark_requirement(Requirements.DATUM_399) @@ -58,3 +62,27 @@ def test_can_import(self): imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'ade20k2017') compare_datasets(self, expected_dataset, imported_dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_399) + def test_can_import_with_meta_file(self): + expected_dataset = Dataset.from_iterable( + [ + DatasetItem(id='street/1', subset='training', + image=np.ones((3, 4, 3)), + annotations=[ + Mask(image=np.array([[0, 1, 0, 0]] * 3), label=0, + group=1, z_order=0, id=1), + Mask(image=np.array([[0, 0, 0, 1]] * 3), label=2, + group=1, z_order=1, id=1), + Mask(image=np.array([[0, 0, 1, 1]] * 3), + group=2, label=1, z_order=0, id=2, + attributes={'walkin': True}) + ]) + ], categories={AnnotationType.label: LabelCategories.from_iterable([ + 'sky', 'person', 'license plate', 'rim']) + } + ) + + imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR_META_FILE, 'ade20k2017') + compare_datasets(self, expected_dataset, imported_dataset, + require_images=True) diff --git a/tests/test_ade20k2020_format.py b/tests/test_ade20k2020_format.py index 894a1355bc..d0412eabd2 100644 --- a/tests/test_ade20k2020_format.py +++ b/tests/test_ade20k2020_format.py @@ -17,7 +17,11 @@ from tests.requirements import Requirements, mark_requirement -DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'ade20k2020_dataset') +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'ade20k2020_dataset', + 'dataset') + +DUMMY_DATASET_DIR_META_FILE = osp.join(osp.dirname(__file__), 'assets', 'ade20k2020_dataset', + 'dataset_with_meta_file') class Ade20k2020ImporterTest(TestCase): @mark_requirement(Requirements.DATUM_399) @@ -78,3 +82,36 @@ def test_can_import(self): imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'ade20k2020') compare_datasets(self, expected_dataset, imported_dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_399) + def test_can_import_with_meta_file(self): + expected_dataset = Dataset.from_iterable( + [ + DatasetItem(id='street/1', subset='training', + image=np.ones((5, 5, 3)), + annotations=[ + Polygon([1, 0, 1, 1, 1, 2, 1, 3, 1, 4], + group=1, z_order=0, id=1, label=1, + attributes={'walkin': True}), + Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, + group=401, z_order=0, id=401), + Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, + group=1831, z_order=0, id=1831), + Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, + id=774, group=774, z_order=1), + Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, + group=0, z_order=0, id=0), + Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, + group=1, z_order=0, id=1, + attributes={'walkin': True}), + Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, + group=2, z_order=1, id=2), + ]) + ], categories={AnnotationType.label: LabelCategories.from_iterable([ + 'car', 'person', 'door', 'rim']) + } + ) + + imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR_META_FILE, 'ade20k2020') + compare_datasets(self, expected_dataset, imported_dataset, + require_images=True) diff --git a/tests/test_align_celeba_format.py b/tests/test_align_celeba_format.py index 9eefcc0daa..a12f143832 100644 --- a/tests/test_align_celeba_format.py +++ b/tests/test_align_celeba_format.py @@ -15,9 +15,12 @@ from .requirements import Requirements, mark_requirement DUMMY_ALIGN_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', - 'align_celeba_dataset',) + 'align_celeba_dataset', 'dataset') +DUMMY_ALIGN_DATASET_DIR_WITH_META_FILE = osp.join(osp.dirname(__file__), + 'assets', 'align_celeba_dataset', 'dataset_with_meta_file') class AlignCelebaImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_475) def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='000001', subset='train', @@ -79,6 +82,36 @@ def test_can_import(self): compare_datasets(self, expected_dataset, dataset, require_images=True) + @mark_requirement(Requirements.DATUM_475) + def test_can_import_with_meta_file(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='000001', subset='train', + image=np.ones((3, 4, 3)), + annotations=[Label(1)] + ), + DatasetItem(id='000002', subset='train', + image=np.ones((3, 4, 3)), + annotations=[Label(3)] + ), + DatasetItem(id='000003', subset='val', + image=np.ones((3, 4, 3)), + annotations=[Label(0)] + ), + DatasetItem(id='000004', subset='test', + image=np.ones((3, 4, 3)), + annotations=[Label(2)] + ), + DatasetItem(id='000005', subset='test', + image=np.ones((3, 4, 3)), + annotations=[Label(6)] + ) + ], categories=[f'class-{i}' for i in range(7)]) + + dataset = Dataset.import_from(DUMMY_ALIGN_DATASET_DIR_WITH_META_FILE, + 'align_celeba') + + compare_datasets(self, expected_dataset, dataset, require_images=True) + @mark_requirement(Requirements.DATUM_475) def test_can_detect_align_dataset(self): detected_formats = Environment().detect_dataset(DUMMY_ALIGN_DATASET_DIR) diff --git a/tests/test_celeba_format.py b/tests/test_celeba_format.py index a9985fd93b..d793900718 100644 --- a/tests/test_celeba_format.py +++ b/tests/test_celeba_format.py @@ -15,7 +15,9 @@ from .requirements import Requirements, mark_requirement DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', - 'celeba_dataset') + 'celeba_dataset', 'dataset') +DUMMY_DATASET_DIR_WITH_META_FILE = osp.join(osp.dirname(__file__), + 'assets', 'celeba_dataset', 'dataset_with_meta_file') class CelebaImporterTest(TestCase): @mark_requirement(Requirements.DATUM_475) @@ -85,6 +87,36 @@ def test_can_import(self): compare_datasets(self, expected_dataset, dataset, require_images=True) + @mark_requirement(Requirements.DATUM_475) + def test_can_import_with_meta_file(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='000001', subset='train', + image=np.ones((3, 4, 3)), + annotations=[Label(1)] + ), + DatasetItem(id='000002', subset='train', + image=np.ones((3, 4, 3)), + annotations=[Label(3)] + ), + DatasetItem(id='000003', subset='val', + image=np.ones((3, 4, 3)), + annotations=[Label(0)] + ), + DatasetItem(id='000004', subset='test', + image=np.ones((3, 4, 3)), + annotations=[Label(2)] + ), + DatasetItem(id='000005', subset='test', + image=np.ones((3, 4, 3)), + annotations=[Label(6)] + ) + ], categories=[f'class-{i}' for i in range(7)]) + + dataset = Dataset.import_from(DUMMY_DATASET_DIR_WITH_META_FILE, + 'celeba') + + compare_datasets(self, expected_dataset, dataset, require_images=True) + @mark_requirement(Requirements.DATUM_475) def test_can_detect(self): detected_formats = Environment().detect_dataset(DUMMY_DATASET_DIR) diff --git a/tests/test_cifar_format.py b/tests/test_cifar_format.py index 1f0d414473..6140a9c33e 100644 --- a/tests/test_cifar_format.py +++ b/tests/test_cifar_format.py @@ -216,6 +216,30 @@ def test_can_catch_pickle_exception(self): with self.assertRaisesRegex(pickle.UnpicklingError, "Global"): Dataset.import_from(test_dir, 'cifar') + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_with_meta_file(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='image_2', subset='test', + image=np.ones((32, 32, 3)), + annotations=[Label(0)] + ), + DatasetItem(id='image_3', subset='test', + image=np.ones((32, 32, 3)) + ), + DatasetItem(id='image_4', subset='test', + image=np.ones((32, 32, 3)), + annotations=[Label(1)] + ) + ], categories=['label_0', 'label_1']) + + with TestDir() as test_dir: + CifarConverter.convert(source_dataset, test_dir, save_images=True, + save_dataset_meta=True) + parsed_dataset = Dataset.import_from(test_dir, 'cifar') + + self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) DUMMY_10_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'cifar10_dataset') diff --git a/tests/test_kitti_format.py b/tests/test_kitti_format.py index 68e18085d6..460d588667 100644 --- a/tests/test_kitti_format.py +++ b/tests/test_kitti_format.py @@ -20,6 +20,7 @@ from datumaro.plugins.kitti_format.importer import ( KittiDetectionImporter, KittiImporter, KittiSegmentationImporter, ) +from datumaro.util.meta_file_util import parse_meta_file from datumaro.util.test_utils import ( TestDir, check_save_and_load, compare_datasets, ) @@ -43,6 +44,20 @@ def test_can_write_and_parse_labelmap(self): self.assertEqual(src_label_map, dst_label_map) + @mark_requirement(Requirements.DATUM_280) + def test_can_write_and_parse_dataset_meta_file(self): + src_label_map = KittiLabelMap + + with TestDir() as test_dir: + source_dataset = Dataset.from_iterable([], + categories=make_kitti_categories(src_label_map)) + + KittiConverter.convert(source_dataset, test_dir, + save_dataset_meta=True) + dst_label_map = parse_meta_file(test_dir) + + self.assertEqual(src_label_map, dst_label_map) + class KittiImportTest(TestCase): @mark_requirement(Requirements.DATUM_280) def test_can_import_segmentation(self): @@ -546,3 +561,72 @@ def test_can_save_detection_with_score_attribute(self): self._test_save_and_load(source_dataset, partial(KittiConverter.convert, save_images=True, tasks=KittiTask.detection), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_detection_with_meta_file(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1_2', subset='test', + image=np.ones((10, 10, 3)), annotations=[ + Bbox(0, 1, 2, 2, label=0, id=0, + attributes={'truncated': False, 'occluded': False, + 'score': 1.0}), + ]), + DatasetItem(id='1_3', subset='test', + image=np.ones((10, 10, 3)), annotations=[ + Bbox(0, 0, 2, 2, label=1, id=0, + attributes={'truncated': True, 'occluded': False, + 'score': 1.0}), + Bbox(6, 2, 3, 4, label=1, id=1, + attributes={'truncated': False, 'occluded': True, + 'score': 1.0}), + ]), + ], categories=['label_0', 'label_1']) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(KittiConverter.convert, + save_images=True, save_dataset_meta=True, + tasks=KittiTask.detection), test_dir) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_segmentation_with_meta_file(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=1, id=1, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=2, id=2, + attributes={'is_crowd': False}), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = (0, 0, 0) + label_map['label_1'] = (1, 2, 3) + label_map['label_2'] = (3, 2, 1) + return make_kitti_categories(label_map) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), + attributes={'is_crowd': False}, id=1, + label=self._label('label_1')), + Mask(image=np.array([[0, 1, 1, 0, 0]]), + attributes={'is_crowd': False}, id=2, + label=self._label('label_2')), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = (0, 0, 0) + label_map['label_1'] = (1, 2, 3) + label_map['label_2'] = (3, 2, 1) + return make_kitti_categories(label_map) + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(KittiConverter.convert, label_map='source', + save_images=True, save_dataset_meta=True), test_dir, + target_dataset=DstExtractor()) + self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) diff --git a/tests/test_lfw_format.py b/tests/test_lfw_format.py index 1173315d1f..0182ec84c3 100644 --- a/tests/test_lfw_format.py +++ b/tests/test_lfw_format.py @@ -201,6 +201,46 @@ def test_can_save_and_load_image_with_arbitrary_extension(self): compare_datasets(self, dataset, parsed_dataset, require_images=True) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_with_meta_file(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='name0_0001', subset='test', + image=np.ones((2, 5, 3)), + annotations=[Label(0, attributes={ + 'positive_pairs': ['name0/name0_0002'] + })] + ), + DatasetItem(id='name0_0002', subset='test', + image=np.ones((2, 5, 3)), + annotations=[Label(0, attributes={ + 'positive_pairs': ['name0/name0_0001'], + 'negative_pairs': ['name1/name1_0001'] + })] + ), + DatasetItem(id='name1_0001', subset='test', + image=np.ones((2, 5, 3)), + annotations=[Label(1, attributes={ + 'positive_pairs': ['name1/name1_0002'] + })] + ), + DatasetItem(id='name1_0002', subset='test', + image=np.ones((2, 5, 3)), + annotations=[Label(1, attributes={ + 'positive_pairs': ['name1/name1_0002'], + 'negative_pairs': ['name0/name0_0001'] + })] + ), + ], categories=['name0', 'name1']) + + with TestDir() as test_dir: + LfwConverter.convert(source_dataset, test_dir, + save_images=True, save_dataset_meta=True) + parsed_dataset = Dataset.import_from(test_dir, 'lfw') + + self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) + DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'lfw_dataset') class LfwImporterTest(TestCase): diff --git a/tests/test_mnist_csv_format.py b/tests/test_mnist_csv_format.py index a2e97cb192..f660763db8 100644 --- a/tests/test_mnist_csv_format.py +++ b/tests/test_mnist_csv_format.py @@ -161,6 +161,34 @@ def test_can_save_and_load_with_other_labels(self): compare_datasets(self, dataset, parsed_dataset, require_images=True) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_with_meta_file(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='test', + image=np.ones((28, 28)), + annotations=[Label(0)] + ), + DatasetItem(id=1, subset='test', + image=np.ones((28, 28)) + ), + DatasetItem(id=2, subset='test', + image=np.ones((28, 28)), + annotations=[Label(1)] + ) + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + str(label) for label in range(10)), + }) + + with TestDir() as test_dir: + MnistCsvConverter.convert(source_dataset, test_dir, save_images=True, + save_dataset_meta=True) + parsed_dataset = Dataset.import_from(test_dir, 'mnist_csv') + + self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) + DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mnist_csv_dataset') class MnistCsvImporterTest(TestCase): diff --git a/tests/test_mnist_format.py b/tests/test_mnist_format.py index c576e8e351..61860a1d07 100644 --- a/tests/test_mnist_format.py +++ b/tests/test_mnist_format.py @@ -159,6 +159,34 @@ def test_can_save_and_load_with_other_labels(self): compare_datasets(self, dataset, parsed_dataset, require_images=True) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_with_meta_file(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='test', + image=np.ones((28, 28)), + annotations=[Label(0)] + ), + DatasetItem(id=1, subset='test', + image=np.ones((28, 28)) + ), + DatasetItem(id=2, subset='test', + image=np.ones((28, 28)), + annotations=[Label(1)] + ) + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + str(label) for label in range(10)), + }) + + with TestDir() as test_dir: + MnistConverter.convert(source_dataset, test_dir, save_images=True, + save_dataset_meta=True) + parsed_dataset = Dataset.import_from(test_dir, 'mnist') + + self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) + DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mnist_dataset') class MnistImporterTest(TestCase): diff --git a/tests/test_synthia_format.py b/tests/test_synthia_format.py index b092ba6bba..f28396c847 100644 --- a/tests/test_synthia_format.py +++ b/tests/test_synthia_format.py @@ -24,6 +24,9 @@ DUMMY_DATASET_DIR_CUSTOM_LABELMAP = osp.join(osp.dirname(__file__), 'assets', 'synthia_dataset', 'dataset_with_custom_labelmap') +DUMMY_DATASET_DIR_META_FILE = osp.join(osp.dirname(__file__), + 'assets', 'synthia_dataset', 'dataset_with_meta_file') + class SynthiaImporterTest(TestCase): @mark_requirement(Requirements.DATUM_497) def test_can_detect(self): @@ -148,3 +151,32 @@ def test_can_import_with_custom_labelmap(self): dataset = Dataset.import_from(DUMMY_DATASET_DIR_CUSTOM_LABELMAP, 'synthia') compare_datasets(self, expected_dataset, dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_497) + def test_can_import_with_meta_file(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='Stereo_Left/Omni_F/000000', + image=np.ones((1, 5, 3)), + annotations=[ + Mask(np.array([[1, 1, 1, 0, 0]]), label=1), + Mask(np.array([[0, 0, 0, 1, 1]]), label=4), + ], + ), + DatasetItem(id='Stereo_Left/Omni_F/000001', + image=np.ones((1, 5, 3)), + annotations=[ + Mask(np.array([[1, 1, 0, 0, 0]]), label=2), + Mask(np.array([[0, 0, 1, 1, 0]]), label=3), + Mask(np.array([[0, 0, 0, 0, 1]]), label=4), + ], + ) + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + ['background', 'sky', 'building', 'person', 'road']), + AnnotationType.mask: MaskCategories({0: (0, 0, 0), 1: (0, 0, 64), + 2: (0, 128, 128), 3: (128, 0, 64), 4: (0, 192, 128)}) + }) + + dataset = Dataset.import_from(DUMMY_DATASET_DIR_META_FILE, 'synthia') + + compare_datasets(self, expected_dataset, dataset, require_images=True) diff --git a/tests/test_yolo_format.py b/tests/test_yolo_format.py index f15a0b9f93..7ffd2fd857 100644 --- a/tests/test_yolo_format.py +++ b/tests/test_yolo_format.py @@ -179,6 +179,41 @@ def test_inplace_save_writes_only_updated_data(self): compare_datasets(self, expected, Dataset.import_from(path, 'yolo'), require_images=True) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_with_meta_file(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(0, 1, 2, 3, label=4), + ]), + DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + Bbox(2, 1, 2, 3, label=4), + ]), + + DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 1, 5, 2, label=2), + Bbox(0, 2, 3, 2, label=5), + Bbox(0, 2, 4, 2, label=6), + Bbox(0, 7, 3, 2, label=7), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) + + with TestDir() as test_dir: + YoloConverter.convert(source_dataset, test_dir, save_images=True, + save_dataset_meta=True) + parsed_dataset = Dataset.import_from(test_dir, 'yolo') + + self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) + compare_datasets(self, source_dataset, parsed_dataset) + DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'yolo_dataset') class YoloImporterTest(TestCase):