Skip to content

Commit

Permalink
Fix Mapillary Vistas data format (#977)
Browse files Browse the repository at this point in the history
<!-- Contributing guide:
https://github.com/openvinotoolkit/datumaro/blob/develop/CONTRIBUTING.md
-->

### Summary

<!--
Resolves #111 and #222.
Depends on #1000 (for series of dependent commits).

This PR introduces this capability to make the project better in this
and that.

- Added this feature
- Removed that feature
- Fixed the problem #1234
-->

### How to test
<!-- Describe the testing procedure for reviewers, if changes are
not fully covered by unit tests or manual testing can be complicated.
-->

### Checklist
<!-- Put an 'x' in all the boxes that apply -->
- [x] I have added unit tests to cover my changes.​
- [ ] I have added integration tests to cover my changes.​
- [x] I have added the description of my changes into
[CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​
- [x] I have updated the
[documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs)
accordingly

### License

- [ ] I submit _my code changes_ under the same [MIT
License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE)
that covers the project.
  Feel free to contact the maintainers if that's a concern.
- [ ] I have updated the license header for each file (see an example
below).

```python
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
```

---------

Co-authored-by: wonjuleee <wonju@intel.com>
  • Loading branch information
wonjuleee and wonjuleee authored Apr 27, 2023
1 parent 6f1c6dd commit ce714d2
Show file tree
Hide file tree
Showing 53 changed files with 685 additions and 790 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## \[Unreleased\]

### New features
- Add CocoRoboflowImporter
(<https://github.com/openvinotoolkit/datumaro/pull/976>)

### Enhancements

### Bug fixes
- Fix Mapillary Vistas data format (<https://github.com/openvinotoolkit/datumaro/pull/977>)

## 20/04/2023 - Release 1.2.0
### New features
- Add Skill Up section to documentation
Expand Down
164 changes: 86 additions & 78 deletions datumaro/plugins/data_formats/mapillary_vistas/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (C) 2022 Intel Corporation
# Copyright (C) 2022-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
import glob
import logging as log
import os
import os.path as osp
Expand Down Expand Up @@ -34,8 +33,21 @@

class _MapillaryVistasBase(SubsetBase):
def __init__(
self, path, task, subset=None, use_original_config=False, keep_original_category_ids=False
self,
path,
task,
subset=None,
use_original_config=False,
keep_original_category_ids=False,
format_version="v2.0",
parse_polygon=False,
):
if format_version == "v1.2" and parse_polygon is True:
raise ImportError(
f"Format version {format_version} is not available for polygons. "
"Please try with v2.0 for parsing polygons."
)

assert osp.isdir(path), path
self._path = path
if subset is None:
Expand All @@ -45,24 +57,25 @@ def __init__(
annotations_dirs = [d for d in os.listdir(path) if d in MapillaryVistasPath.ANNOTATION_DIRS]

if len(annotations_dirs) == 0:
expected_dirs = ",".join(MapillaryVistasPath.ANNOTATION_DIRS[format_version])
raise NotADirectoryError(
"Can't find annotation directory at %s. "
"Expected one of these directories: %s"
% (path, ",".join(MapillaryVistasPath.ANNOTATIONS_DIR_PATTERNS))
f"Can't find annotation directory at {path}. "
f"Expected one of these directories: {expected_dirs}."
)
elif len(annotations_dirs) > 1:
skipped_dirs = ",".join(annotations_dirs[1:])
log.warning(
"Directory(-es): %s will be skipped, dataset should contain "
"only one annotation directory" % ",".join(annotations_dirs[1:])
f"Directory(-es): {skipped_dirs} will be skipped, dataset should "
"contain only one annotation directory"
)

self._use_original_config = use_original_config
self._format_version = annotations_dirs[0]
self._annotations_dir = osp.join(path, annotations_dirs[0])
self._format_version = format_version
self._parse_polygon = parse_polygon
self._annotations_dir = osp.join(path, format_version)
self._images_dir = osp.join(path, MapillaryVistasPath.IMAGES_DIR)
self._task = task

if self._task == MapillaryVistasTask.instances:
if task == MapillaryVistasTask.instances:
if has_meta_file(path):
self._categories = make_mapillary_instance_categories(parse_meta_file(path))
else:
Expand All @@ -75,16 +88,16 @@ def __init__(
)
self._items = self._load_panoptic_items(panoptic_config)

@staticmethod
def _load_panoptic_config(path):
def _load_panoptic_config(self, path):
panoptic_config_path = osp.join(
path, MapillaryVistasPath.PANOPTIC_DIR, MapillaryVistasPath.PANOPTIC_CONFIG
path,
MapillaryVistasPath.PANOPTIC_DIR,
MapillaryVistasPath.PANOPTIC_CONFIG[self._format_version],
)

if not osp.isfile(panoptic_config_path):
raise FileNotFoundError(
"Can't find panoptic config file: '%s' at '%s'"
% (MapillaryVistasPath.PANOPTIC_CONFIG, panoptic_config_path)
f"Can't find panoptic config file: {MapillaryVistasPath.PANOPTIC_CONFIG} at {panoptic_config_path}"
)

return parse_json_file(panoptic_config_path)
Expand Down Expand Up @@ -127,6 +140,8 @@ def _load_panoptic_items(self, config):
for img in config["images"]
}

polygon_dir = osp.join(self._annotations_dir, MapillaryVistasPath.POLYGON_DIR)

for item_ann in config["annotations"]:
item_id = item_ann["image_id"]
image = None
Expand All @@ -136,13 +151,13 @@ def _load_panoptic_items(self, config):
size=self._get_image_size(images_info[item_id]),
)

annotations = []
mask_path = osp.join(
self._annotations_dir, MapillaryVistasPath.PANOPTIC_DIR, item_ann["file_name"]
)
mask = lazy_image(mask_path, loader=self._load_pan_mask)
mask = CompiledMask(instance_mask=mask)

annotations = []
for segment_info in item_ann["segments_info"]:
cat_id = self._get_label_id(segment_info)
segment_id = segment_info["id"]
Expand All @@ -157,11 +172,24 @@ def _load_panoptic_items(self, config):
)
)

if self._parse_polygon:
polygon_path = osp.join(polygon_dir, item_id + ".json")
item_info = parse_json_file(polygon_path)

polygons = item_info["objects"]
for polygon in polygons:
label = polygon["label"]
label_id = self._categories[AnnotationType.label].find(label)[0]
if label_id is None:
label_id = self._categories[AnnotationType.label].add(label)

points = [int(coord) for point in polygon["polygon"] for coord in point]
annotations.append(Polygon(label=label_id, points=points))

items[item_id] = DatasetItem(
id=item_id, subset=self._subset, annotations=annotations, media=image
)

self._load_polygons(items)
return items.values()

def _load_instances_categories(self):
Expand All @@ -180,80 +208,60 @@ def _load_instances_categories(self):
def _load_instances_items(self):
items = {}

instances_dir = osp.join(self._annotations_dir, MapillaryVistasPath.INSTANCES_DIR)
for instance_path in find_images(instances_dir, recursive=True):
item_id = osp.splitext(osp.relpath(instance_path, instances_dir))[0]
# class_dir = osp.join(self._annotations_dir, MapillaryVistasPath.CLASS_DIR)
# for class_path in find_images(class_dir, recursive=True):
# item_id = osp.splitext(osp.relpath(class_path, class_dir))[0]
# if item_id in items:
# continue

mask = load_image(instance_path, dtype=np.uint32)
# from PIL import Image as PILImage

annotations = []
for uval in np.unique(mask):
label_id, instance_id = uval >> 8, uval & 255
annotations.append(
Mask(image=self._lazy_extract_mask(mask, uval), label=label_id, id=instance_id)
)
# class_mask = np.array(PILImage.open(class_path))
# classes = np.unique(class_mask)

items[item_id] = DatasetItem(id=item_id, subset=self._subset, annotations=annotations)
# annotations = []
# for label_id in classes:
# annotations.append(
# Mask(label=label_id, image=self._lazy_extract_mask(class_mask, label_id))
# )

class_dir = osp.join(self._annotations_dir, MapillaryVistasPath.CLASS_DIR)
for class_path in find_images(class_dir, recursive=True):
item_id = osp.splitext(osp.relpath(class_path, class_dir))[0]
if item_id in items:
continue
# items[item_id] = DatasetItem(id=item_id, subset=self._subset, annotations=annotations)

from PIL import Image as PILImage
instance_dir = osp.join(self._annotations_dir, MapillaryVistasPath.INSTANCES_DIR)
polygon_dir = osp.join(self._annotations_dir, MapillaryVistasPath.POLYGON_DIR)
for image_path in find_images(self._images_dir, recursive=True):
item_id = osp.splitext(osp.relpath(image_path, self._images_dir))[0]
image = Image.from_file(path=image_path)

class_mask = np.array(PILImage.open(class_path))
classes = np.unique(class_mask)
instance_path = osp.join(instance_dir, item_id + MapillaryVistasPath.MASK_EXT)
mask = load_image(instance_path, dtype=np.uint32)

annotations = []
for label_id in classes:
for uval in np.unique(mask):
label_id, instance_id = uval >> 8, uval & 255
annotations.append(
Mask(label=label_id, image=self._lazy_extract_mask(class_mask, label_id))
Mask(image=self._lazy_extract_mask(mask, uval), label=label_id, id=instance_id)
)

items[item_id] = DatasetItem(id=item_id, subset=self._subset, annotations=annotations)
if self._parse_polygon:
polygon_path = osp.join(polygon_dir, item_id + ".json")
item_info = parse_json_file(polygon_path)

for image_path in find_images(self._images_dir, recursive=True):
item_id = osp.splitext(osp.relpath(image_path, self._images_dir))[0]
image = Image.from_file(path=image_path)
if item_id in items:
items[item_id].media = image
else:
items[item_id] = DatasetItem(id=item_id, subset=self._subset, media=image)
polygons = item_info["objects"]
for polygon in polygons:
label = polygon["label"]
label_id = self._categories[AnnotationType.label].find(label)[0]
if label_id is None:
label_id = self._categories[AnnotationType.label].add(label)

self._load_polygons(items)
return items.values()

def _load_polygons(self, items):
polygons_dir = osp.join(self._annotations_dir, MapillaryVistasPath.POLYGON_DIR)
for item_path in glob.glob(osp.join(polygons_dir, "**", "*.json"), recursive=True):
item_id = osp.splitext(osp.relpath(item_path, polygons_dir))[0]
item = items.get(item_id)
item_info = {}
item_info = parse_json_file(item_path)
points = [int(coord) for point in polygon["polygon"] for coord in point]
annotations.append(Polygon(label=label_id, points=points))

image_size = self._get_image_size(item_info)
if image_size and item.has_image:
item.media = item.image.from_self(size=image_size)
items[item_id] = DatasetItem(
id=item_id, subset=self._subset, media=image, annotations=annotations
)

polygons = item_info["objects"]
annotations = []
for polygon in polygons:
label = polygon["label"]
label_id = self._categories[AnnotationType.label].find(label)[0]
if label_id is None:
label_id = self._categories[AnnotationType.label].add(label)

points = [coord for point in polygon["polygon"] for coord in point]
annotations.append(Polygon(label=label_id, points=points))

if item is None:
items[item_id] = DatasetItem(
id=item_id, subset=self._subset, annotations=annotations
)
else:
item.annotations.extend(annotations)
return items.values()

@staticmethod
def _get_image_size(image_info):
Expand Down
6 changes: 3 additions & 3 deletions datumaro/plugins/data_formats/mapillary_vistas/format.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2022 Intel Corporation
# Copyright (C) 2022-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

Expand Down Expand Up @@ -49,12 +49,12 @@ class MapillaryVistasPath:
MASK_EXT = ".png"

ANNOTATION_DIRS = {
"v1.2": [CLASS_DIR, INSTANCES_DIR],
"v1.2": [CLASS_DIR, INSTANCES_DIR, PANOPTIC_DIR],
"v2.0": [CLASS_DIR, INSTANCES_DIR, PANOPTIC_DIR, POLYGON_DIR],
}

CONFIG_FILES = {"v1.2": "config_v1.2.json", "v2.0": "config_v2.0.json"}
PANOPTIC_CONFIG = "panoptic_2020.json"
PANOPTIC_CONFIG = {"v1.2": "panoptic_2018.json", "v2.0": "panoptic_2020.json"}

CLASS_BY_DIR = {
INSTANCES_DIR: MapillaryVistasTask.instances,
Expand Down
27 changes: 20 additions & 7 deletions datumaro/plugins/data_formats/mapillary_vistas/importer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021 Intel Corporation
# Copyright (C) 2022-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
import glob
Expand All @@ -7,6 +7,7 @@

from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME
from datumaro.components.importer import Importer
from datumaro.util import str_to_bool

from .base import MapillaryVistasInstancesBase, MapillaryVistasPanopticBase
from .format import MapillaryVistasPath, MapillaryVistasTask
Expand All @@ -21,6 +22,18 @@ class MapillaryVistasImporter(Importer):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument(
"--format-version",
default="v2.0",
type=str,
help="Use original config*.json file for your version of dataset",
)
parser.add_argument(
"--parse-polygon",
type=str_to_bool,
default=False,
help="Use original config*.json file for your version of dataset",
)
parser.add_argument(
"--use-original-config",
action="store_true",
Expand All @@ -39,15 +52,15 @@ def __call__(self, path, **extra_params):
subsets = self.find_sources(path)

if len(subsets) == 0:
raise Exception("Failed to find Mapillary Vistas dataset at '%s'" % path)
raise Exception(f"Failed to find Mapillary Vistas dataset at {path}")

tasks = list(set(task for subset in subsets.values() for task in subset))
selected_task = tasks[0]
if 1 < len(tasks):
task_types = ",".join(task.name for task in tasks)
log.warning(
"Found potentially conflicting source types: %s"
"Only one one type will be used: %s"
% (",".join(task.name for task in tasks), selected_task.name)
f"Found potentially conflicting source types: {task_types}"
f"Only one one type will be used: {selected_task.name}"
)

if selected_task == MapillaryVistasTask.instances:
Expand All @@ -60,8 +73,8 @@ def __call__(self, path, **extra_params):

if not has_config and not extra_params.get("use_original_config"):
raise Exception(
"Failed to find config*.json at '%s'. "
"See extra args for using original config" % path
f"Failed to find config*.json at {path}. "
"See extra args for using original config."
)

sources = [
Expand Down
Loading

0 comments on commit ce714d2

Please sign in to comment.