diff --git a/CHANGELOG.md b/CHANGELOG.md index 53e42439729e..7b483fc06ebf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [Market-1501](https://www.aitribune.com/dataset/2018051063) format support () - Ability of upload manifest for dataset with images () - Annotations filters UI using react-awesome-query-builder (https://github.com/openvinotoolkit/cvat/issues/1418) +- [ICDAR](https://rrc.cvc.uab.es/?ch=2) format support () ### Changed diff --git a/README.md b/README.md index 0c62b9facbc9..e79deaf7dc67 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ For more information about supported formats look at the | [WIDER Face](http://shuoyang1213.me/WIDERFACE/) | X | X | | [VGGFace2](https://github.com/ox-vgg/vgg_face2) | X | X | | [Market-1501](https://www.aitribune.com/dataset/2018051063) | X | X | +| [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) | X | X | ## Deep learning serverless functions for automatic labeling diff --git a/cvat/apps/dataset_manager/formats/README.md b/cvat/apps/dataset_manager/formats/README.md index 318b4cfe87e8..cee318541928 100644 --- a/cvat/apps/dataset_manager/formats/README.md +++ b/cvat/apps/dataset_manager/formats/README.md @@ -23,6 +23,7 @@ - [WIDER Face](#widerface) - [VGGFace2](#vggface2) - [Market-1501](#market1501) + - [ICDAR13/15](#icdar) ## How to add a new annotation format support @@ -817,9 +818,9 @@ Downloaded file: a zip archive of the following structure: ```bash # if we save images: taskname.zip/ -└── label1/ - ├── label1_image1.jpg - └── label1_image2.jpg +├── label1/ +| ├── label1_image1.jpg +| └── label1_image2.jpg └── label2/ ├── label2_image1.jpg ├── label2_image3.jpg @@ -827,7 +828,7 @@ taskname.zip/ # if we keep only annotation: taskname.zip/ -└── .txt +├── .txt └── synsets.txt ``` @@ -849,12 +850,12 @@ Downloaded file: a zip archive of the following structure: ```bash taskname.zip/ ├── labelmap.txt # optional, required for non-CamVid labels -└── / - ├── image1.png - └── image2.png -└── annot/ - ├── image1.png - └── image2.png +├── / +| ├── image1.png +| └── image2.png +├── annot/ +| ├── image1.png +| └── image2.png └── .txt # labelmap.txt @@ -974,3 +975,72 @@ s1 - sequence Uploaded file: a zip archive of the structure above - supported annotations: Label `market-1501` with atrributes (`query`, `person_id`, `camera_id`) + +### [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) + +#### ICDAR13/15 Dumper + +Downloaded file: a zip archive of the following structure: + +```bash +# word recognition task +taskname.zip/ +└── word_recognition/ + └── / + ├── images + | ├── word1.png + | └── word2.png + └── gt.txt +# text localization task +taskname.zip/ +└── text_localization/ + └── / + ├── images + | ├── img_1.png + | └── img_2.png + ├── gt_img_1.txt + └── gt_img_1.txt +#text segmentation task +taskname.zip/ +└── text_localization/ + └── / + ├── images + | ├── 1.png + | └── 2.png + ├── 1_GT.bmp + ├── 1_GT.txt + ├── 2_GT.bmp + └── 2_GT.txt +``` + +**Word recognition task**: + +- supported annotations: Label `icdar` with attribute `caption` + +**Text localization task**: + +- supported annotations: Rectangles and Polygons with label `icdar` + and attribute `text` + +**Text segmentation task**: + +- supported annotations: Rectangles and Polygons with label `icdar` + and attributes `index`, `text`, `color`, `center` + +#### ICDAR13/15 Loader + +Uploaded file: a zip archive of the structure above + +**Word recognition task**: + +- supported annotations: Label `icdar` with attribute `caption` + +**Text localization task**: + +- supported annotations: Rectangles and Polygons with label `icdar` + and attribute `text` + +**Text segmentation task**: + +- supported annotations: Rectangles and Polygons with label `icdar` + and attributes `index`, `text`, `color`, `center` diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py new file mode 100644 index 000000000000..8df8f3e40fd8 --- /dev/null +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -0,0 +1,131 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import zipfile +from tempfile import TemporaryDirectory + +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import (AnnotationType, Caption, Label, + LabelCategories, Transform) + +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations) +from cvat.apps.dataset_manager.util import make_zip_archive + +from .registry import dm_env, exporter, importer + + +class AddLabelToAnns(Transform): + def __init__(self, extractor, label): + super().__init__(extractor) + + assert isinstance(label, str) + self._categories = {} + label_cat = self._extractor.categories().get(AnnotationType.label) + if not label_cat: + label_cat = LabelCategories() + self._label = label_cat.add(label) + self._categories[AnnotationType.label] = label_cat + + def categories(self): + return self._categories + + def transform_item(self, item): + annotations = item.annotations + for ann in annotations: + if ann.type in [AnnotationType.polygon, + AnnotationType.bbox, AnnotationType.mask]: + ann.label = self._label + return item.wrap(annotations=annotations) + +class CaptionToLabel(Transform): + def __init__(self, extractor, label): + super().__init__(extractor) + + assert isinstance(label, str) + self._categories = {} + label_cat = self._extractor.categories().get(AnnotationType.label) + if not label_cat: + label_cat = LabelCategories() + self._label = label_cat.add(label) + self._categories[AnnotationType.label] = label_cat + + def categories(self): + return self._categories + + def transform_item(self, item): + annotations = item.annotations + captions = [ann for ann in annotations + if ann.type == AnnotationType.caption] + for ann in captions: + annotations.append(Label(self._label, + attributes={'text': ann.caption})) + annotations.remove(ann) + return item.wrap(annotations=annotations) + +class LabelToCaption(Transform): + def transform_item(self, item): + annotations = item.annotations + anns = [p for p in annotations + if 'text' in p.attributes] + for ann in anns: + annotations.append(Caption(ann.attributes['text'])) + annotations.remove(ann) + return item.wrap(annotations=annotations) + +@exporter(name='ICDAR Recognition', ext='ZIP', version='1.0') +def _export_recognition(dst_file, task_data, save_images=False): + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) + dataset.transform(LabelToCaption) + with TemporaryDirectory() as temp_dir: + dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) + make_zip_archive(temp_dir, dst_file) + +@importer(name='ICDAR Recognition', ext='ZIP', version='1.0') +def _import(src_file, task_data): + with TemporaryDirectory() as tmp_dir: + zipfile.ZipFile(src_file).extractall(tmp_dir) + dataset = Dataset.import_from(tmp_dir, 'icdar_word_recognition', env=dm_env) + dataset.transform(CaptionToLabel, 'icdar') + import_dm_annotations(dataset, task_data) + + +@exporter(name='ICDAR Localization', ext='ZIP', version='1.0') +def _export_localization(dst_file, task_data, save_images=False): + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) + with TemporaryDirectory() as temp_dir: + dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images) + make_zip_archive(temp_dir, dst_file) + +@importer(name='ICDAR Localization', ext='ZIP', version='1.0') +def _import(src_file, task_data): + with TemporaryDirectory() as tmp_dir: + zipfile.ZipFile(src_file).extractall(tmp_dir) + + dataset = Dataset.import_from(tmp_dir, 'icdar_text_localization', env=dm_env) + dataset.transform(AddLabelToAnns, 'icdar') + import_dm_annotations(dataset, task_data) + + +@exporter(name='ICDAR Segmentation', ext='ZIP', version='1.0') +def _export_segmentation(dst_file, task_data, save_images=False): + dataset = Dataset.from_extractors(CvatTaskDataExtractor( + task_data, include_images=save_images), env=dm_env) + with TemporaryDirectory() as temp_dir: + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') + dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images) + make_zip_archive(temp_dir, dst_file) + +@importer(name='ICDAR Segmentation', ext='ZIP', version='1.0') +def _import(src_file, task_data): + with TemporaryDirectory() as tmp_dir: + zipfile.ZipFile(src_file).extractall(tmp_dir) + dataset = Dataset.import_from(tmp_dir, 'icdar_text_segmentation', env=dm_env) + dataset.transform(AddLabelToAnns, 'icdar') + dataset.transform('masks_to_polygons') + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index f46a988ff430..e662485498cf 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -98,3 +98,4 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.widerface import cvat.apps.dataset_manager.formats.vggface2 import cvat.apps.dataset_manager.formats.market1501 +import cvat.apps.dataset_manager.formats.icdar diff --git a/cvat/apps/dataset_manager/tests/test_formats.py b/cvat/apps/dataset_manager/tests/test_formats.py index faee5d182910..f4589feedc05 100644 --- a/cvat/apps/dataset_manager/tests/test_formats.py +++ b/cvat/apps/dataset_manager/tests/test_formats.py @@ -285,6 +285,9 @@ def test_export_formats_query(self): 'WiderFace 1.0', 'VGGFace2 1.0', 'Market-1501 1.0', + 'ICDAR Recognition 1.0', + 'ICDAR Localization 1.0', + 'ICDAR Segmentation 1.0', }) def test_import_formats_query(self): @@ -306,6 +309,9 @@ def test_import_formats_query(self): 'WiderFace 1.0', 'VGGFace2 1.0', 'Market-1501 1.0', + 'ICDAR Recognition 1.0', + 'ICDAR Localization 1.0', + 'ICDAR Segmentation 1.0', }) def test_exports(self): @@ -319,7 +325,7 @@ def check(file_path): format_name = f.DISPLAY_NAME if format_name == "VGGFace2 1.0": - self.skipTest("Format does not support multiple shapes for one item") + self.skipTest("Format is disabled") for save_images in { True, False }: images = self._generate_task_images(3) @@ -349,6 +355,9 @@ def test_empty_images_are_exported(self): ('WiderFace 1.0', 'wider_face'), ('VGGFace2 1.0', 'vgg_face2'), ('Market-1501 1.0', 'market1501'), + ('ICDAR Recognition 1.0', 'icdar_word_recognition'), + ('ICDAR Localization 1.0', 'icdar_text_localization'), + ('ICDAR Segmentation 1.0', 'icdar_text_segmentation'), ]: with self.subTest(format=format_name): if not dm.formats.registry.EXPORT_FORMATS[format_name].ENABLED: diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 67fb41b5fa90..01b85641ecbc 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2913,6 +2913,49 @@ def _create_task(self, owner, assignee, annotation_format=""): }, ] }] + elif annotation_format in ["ICDAR Recognition 1.0", + "ICDAR Localization 1.0"]: + data["labels"] = [{ + "name": "icdar", + "attributes": [ + { + "name": "text", + "mutable": False, + "input_type": "text", + "values": ["word_1", "word_2", "word_3"] + }, + ] + }] + elif annotation_format == "ICDAR Segmentation 1.0": + data["labels"] = [{ + "name": "icdar", + "attributes": [ + { + "name": "text", + "mutable": False, + "input_type": "text", + "values": ["word_1", "word_2", "word_3"] + }, + { + "name": "index", + "mutable": False, + "input_type": "number", + "values": ["0", "1", "2"] + }, + { + "name": "color", + "mutable": False, + "input_type": "text", + "values": ["100 110 240", "10 15 20", "120 128 64"] + }, + { + "name": "center", + "mutable": False, + "input_type": "text", + "values": ["1 2", "2 4", "10 45"] + }, + ] + }] with ForceLogin(owner, self.client): response = self.client.post('/api/v1/tasks', data=data, format="json") @@ -3892,7 +3935,8 @@ def _run_api_v1_tasks_id_annotations_dump_load(self, owner, assignee, annotator) HTTP_201_CREATED = status.HTTP_401_UNAUTHORIZED def _get_initial_annotation(annotation_format): - if annotation_format != "Market-1501 1.0": + if annotation_format not in ["Market-1501 1.0", "ICDAR Recognition 1.0", + "ICDAR Localization 1.0", "ICDAR Segmentation 1.0"]: rectangle_tracks_with_attrs = [{ "frame": 0, "label_id": task["labels"][0]["id"], @@ -4237,6 +4281,116 @@ def _get_initial_annotation(annotation_format): }] annotations["tags"] = tags_with_attrs + elif annotation_format == "ICDAR Recognition 1.0": + tags_with_attrs = [{ + "frame": 1, + "label_id": task["labels"][0]["id"], + "group": 0, + "source": "manual", + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][1] + } + ], + }] + + annotations["tags"] = tags_with_attrs + + elif annotation_format == "ICDAR Localization 1.0": + rectangle_shapes_with_attrs = [{ + "frame": 0, + "label_id": task["labels"][0]["id"], + "group": 0, + "source": "manual", + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][0] + }, + ], + "points": [1.0, 2.1, 10.6, 53.22], + "type": "rectangle", + "occluded": False, + }] + polygon_shapes_with_attrs = [{ + "frame": 0, + "label_id": task["labels"][0]["id"], + "group": 0, + "source": "manual", + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][1] + }, + ], + "points": [20.0, 0.1, 10, 3.22, 4, 7, 10, 30], + "type": "polygon", + "occluded": False, + }] + + annotations["shapes"] = rectangle_shapes_with_attrs \ + + polygon_shapes_with_attrs + + elif annotation_format == "ICDAR Segmentation 1.0": + rectangle_shapes_with_attrs = [{ + "frame": 0, + "label_id": task["labels"][0]["id"], + "group": 0, + "source": "manual", + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][0] + }, + { + "spec_id": task["labels"][0]["attributes"][1]["id"], + "value": task["labels"][0]["attributes"][1]["values"][0] + }, + { + "spec_id": task["labels"][0]["attributes"][2]["id"], + "value": task["labels"][0]["attributes"][2]["values"][1] + }, + { + "spec_id": task["labels"][0]["attributes"][3]["id"], + "value": task["labels"][0]["attributes"][3]["values"][2] + } + ], + "points": [1.0, 2.1, 10.6, 53.22], + "type": "rectangle", + "occluded": False, + }] + polygon_shapes_with_attrs = [{ + "frame": 0, + "label_id": task["labels"][0]["id"], + "group": 0, + "source": "manual", + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][1] + }, + { + "spec_id": task["labels"][0]["attributes"][1]["id"], + "value": task["labels"][0]["attributes"][1]["values"][1] + }, + { + "spec_id": task["labels"][0]["attributes"][2]["id"], + "value": task["labels"][0]["attributes"][2]["values"][0] + }, + { + "spec_id": task["labels"][0]["attributes"][3]["id"], + "value": task["labels"][0]["attributes"][3]["values"][1] + } + ], + "points": [20.0, 0.1, 10, 3.22, 4, 7, 10, 30], + "type": "polygon", + "occluded": False, + }] + + annotations["shapes"] = rectangle_shapes_with_attrs \ + + polygon_shapes_with_attrs + else: raise Exception("Unknown format {}".format(annotation_format)) @@ -4333,7 +4487,8 @@ def _get_initial_annotation(annotation_format): self.assertEqual(response.status_code, HTTP_201_CREATED) # 7. check annotation - if import_format in {"Segmentation mask 1.1", "MOTS PNG 1.0", "CamVid 1.0"}: + if export_format in {"Segmentation mask 1.1", "MOTS PNG 1.0", + "CamVid 1.0", "ICDAR Segmentation 1.0"}: continue # can't really predict the result to check response = self._get_api_v1_tasks_id_annotations(task["id"], annotator) self.assertEqual(response.status_code, HTTP_200_OK) diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 25a1573a0f8f..2d5c8a1fdf9b 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -50,4 +50,4 @@ open3d==0.11.2 # --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20 # of pycocotools and tensorflow 2.4.1 # when pycocotools is installed by wheel in python 3.8+ -datumaro==0.1.6.1 --no-binary=datumaro --no-binary=pycocotools +datumaro==0.1.7 --no-binary=datumaro --no-binary=pycocotools