Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support mask annotations for CVAT data format #1078

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/1049>, <https://github.com/openvinotoolkit/datumaro/pull/1063>, <https://github.com/openvinotoolkit/datumaro/pull/1064>)
- Add OVMSLauncher
(<https://github.com/openvinotoolkit/datumaro/pull/1056>)
- Add Prune API
(<https://github.com/openvinotoolkit/datumaro/pull/1058>)
- Add TritonLauncher
(<https://github.com/openvinotoolkit/datumaro/pull/1059>)
- Migrate DVC v3.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/1072>)
- Add Prune API
(<https://github.com/openvinotoolkit/datumaro/pull/1058>)
- Support mask annotations for CVAT data format
(<https://github.com/openvinotoolkit/datumaro/pull/1078>)

### Enhancements
- Enhance import performance for built-in plugins
Expand Down
1 change: 1 addition & 0 deletions docs/source/docs/data-formats/formats/cvat.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Supported annotation types:
- `Points`
- `Polygon`
- `PolyLine`
- `Mask`

Supported annotation attributes:
- It supports any arbitrary boolean, floating number, or string attribute.
Expand Down
50 changes: 44 additions & 6 deletions src/datumaro/plugins/data_formats/cvat/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
from copy import deepcopy
from typing import Optional

import numpy as np
from defusedxml import ElementTree

from datumaro.components.annotation import (
AnnotationType,
Bbox,
Label,
LabelCategories,
Mask,
Points,
Polygon,
PolyLine,
Expand All @@ -23,6 +25,7 @@
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.components.importer import ImportContext, Importer
from datumaro.components.media import Image
from datumaro.util import mask_tools

from .format import CvatPath

Expand All @@ -46,8 +49,6 @@ def _find_meta_root(path: str):


class CvatBase(SubsetBase):
_SUPPORTED_SHAPES = ("box", "polygon", "polyline", "points")

def __init__(
self,
path: str,
Expand Down Expand Up @@ -103,7 +104,7 @@ def _parse(self, path):
"height": el.attrib.get("height"),
}
subset = el.attrib.get("subset")
elif el.tag in self._SUPPORTED_SHAPES and (track or image):
elif el.tag in CvatPath.SUPPORTED_IMPORT_SHAPES and (track or image):
attributes = {}
shape = {
"type": None,
Expand Down Expand Up @@ -134,7 +135,7 @@ def _parse(self, path):
except ValueError:
pass
attributes[el.attrib["name"]] = attr_value
elif el.tag in self._SUPPORTED_SHAPES:
elif el.tag in CvatPath.SUPPORTED_IMPORT_SHAPES:
if track is not None:
shape["frame"] = el.attrib["frame"]
shape["outside"] = el.attrib.get("outside") == "1"
Expand All @@ -159,14 +160,22 @@ def _parse(self, path):
],
)
)
elif el.tag == "mask":
shape["rle"] = el.attrib["rle"]
shape["left"] = el.attrib["left"]
shape["top"] = el.attrib["top"]
shape["width"] = el.attrib["width"]
shape["height"] = el.attrib["height"]
else:
shape["points"] = []
for pair in el.attrib["points"].split(";"):
shape["points"].extend(map(float, pair.split(",")))

if subset is None or subset == self._subset:
frame_desc = items.get(shape["frame"], {"annotations": []})
frame_desc["annotations"].append(self._parse_shape_ann(shape, categories))
frame_desc["annotations"].append(
self._parse_shape_ann(shape, categories, image)
)
items[shape["frame"]] = frame_desc
shape = None

Expand Down Expand Up @@ -240,7 +249,7 @@ def _parse_meta(meta_root):
return categories, frame_size, attribute_types

@classmethod
def _parse_shape_ann(cls, ann, categories):
def _parse_shape_ann(cls, ann, categories, image):
ann_id = ann.get("id", 0)
ann_type = ann["type"]

Expand Down Expand Up @@ -307,6 +316,35 @@ def _parse_shape_ann(cls, ann, categories):
group=group,
)

elif ann_type == "mask":
rle = ann.get("rle")
mask_w, mask_h = int(ann.get("width")), int(ann.get("height"))
mask_l, mask_t = int(ann.get("left")), int(ann.get("top"))
img_w, img_h = int(image.get("width")), int(image.get("height"))

rle_uncompressed = {
"counts": np.array([int(str_num) for str_num in rle.split(",")], dtype=np.uint32),
"size": np.array([mask_w, mask_h]),
}

def _gen_mask():
# From the manual test for the dataset exported from the CVAT 2.5,
# the RLE encoding in the dataset has (W, H) binary 2D np.ndarray, not (H, W)
# Therefore, we need to tranpose it to make its shape as (H, W).
mask = mask_tools.rle_to_mask(rle_uncompressed).transpose()
canvas = np.zeros(shape=[img_h, img_w], dtype=np.uint8)
canvas[mask_t : mask_t + mask_h, mask_l : mask_l + mask_w] = mask
return canvas

return Mask(
image=_gen_mask,
label=label_id,
z_order=z_order,
id=ann_id,
attributes=attributes,
group=group,
)

else:
raise NotImplementedError("Unknown annotation type '%s'" % ann_type)

Expand Down
44 changes: 31 additions & 13 deletions src/datumaro/plugins/data_formats/cvat/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from datumaro.components.errors import MediaTypeError
from datumaro.components.exporter import Exporter
from datumaro.components.media import Image
from datumaro.util import cast, pairs
from datumaro.util import cast, mask_tools, pairs

from .format import CvatPath

Expand Down Expand Up @@ -106,6 +106,11 @@ def open_points(self, points):
self.xmlgen.startElement("points", points)
self._level += 1

def open_mask(self, mask):
self._indent()
self.xmlgen.startElement("mask", mask)
self._level += 1

def open_tag(self, tag):
self._indent()
self.xmlgen.startElement("tag", tag)
Expand Down Expand Up @@ -134,6 +139,9 @@ def close_polyline(self):
def close_points(self):
self._close_element("points")

def close_mask(self):
self._close_element("mask")

def close_tag(self):
self._close_element("tag")

Expand Down Expand Up @@ -182,12 +190,7 @@ def _write_track(self, track):

self._writer.open_track(track_info)
for ann in annotations:
if ann.type in {
AnnotationType.points,
AnnotationType.polyline,
AnnotationType.polygon,
AnnotationType.bbox,
}:
if ann.type in CvatPath.SUPPORTED_EXPORT_SHAPES:
self._write_shape(ann, write_label_info=False, write_frame=True)
self._writer.close_track()

Expand Down Expand Up @@ -254,12 +257,7 @@ def _write_item(self, item, index):
self._writer.open_image(image_info)

for ann in item.annotations:
if ann.type in {
AnnotationType.points,
AnnotationType.polyline,
AnnotationType.polygon,
AnnotationType.bbox,
}:
if ann.type in CvatPath.SUPPORTED_EXPORT_SHAPES:
self._write_shape(ann, item)
elif ann.type == AnnotationType.label:
self._write_tag(ann, item)
Expand Down Expand Up @@ -389,6 +387,22 @@ def _write_shape(self, shape, item=None, write_label_info=True, write_frame=Fals
]
)
)
elif shape.type == AnnotationType.mask:
# From the manual test for the dataset exported from the CVAT 2.5,
# the RLE encoding in the dataset has (W, H) binary 2D np.ndarray, not (H, W)
# Therefore, we need to tranpose it to make its shape as (H, W).
mask = shape.image.transpose()
rle_uncompressed = mask_tools.mask_to_rle(mask)
width, height = mask.shape
shape_data.update(
OrderedDict(
rle=", ".join([str(c) for c in rle_uncompressed["counts"]]),
left=str(0),
top=str(0),
width=str(width),
height=str(height),
)
)
else:
shape_data.update(
OrderedDict(
Expand Down Expand Up @@ -418,6 +432,8 @@ def _write_shape(self, shape, item=None, write_label_info=True, write_frame=Fals
self._writer.open_polyline(shape_data)
elif shape.type == AnnotationType.points:
self._writer.open_points(shape_data)
elif shape.type == AnnotationType.mask:
self._writer.open_mask(shape_data)
else:
raise NotImplementedError("unknown shape type")

Expand Down Expand Up @@ -456,6 +472,8 @@ def _write_shape(self, shape, item=None, write_label_info=True, write_frame=Fals
self._writer.close_polyline()
elif shape.type == AnnotationType.points:
self._writer.close_points()
elif shape.type == AnnotationType.mask:
self._writer.close_mask()
else:
raise NotImplementedError("unknown shape type")

Expand Down
18 changes: 18 additions & 0 deletions src/datumaro/plugins/data_formats/cvat/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,27 @@
# SPDX-License-Identifier: MIT


from datumaro.components.annotation import AnnotationType


class CvatPath:
IMAGES_DIR = "images"

IMAGE_EXT = ".jpg"

BUILTIN_ATTRS = {"occluded", "outside", "keyframe", "track_id"}

SUPPORTED_IMPORT_SHAPES = {
"box",
"polygon",
"polyline",
"points",
"mask",
}
SUPPORTED_EXPORT_SHAPES = {
AnnotationType.bbox,
AnnotationType.polygon,
AnnotationType.polyline,
AnnotationType.points,
AnnotationType.mask,
}
43 changes: 25 additions & 18 deletions src/datumaro/util/mask_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

from functools import partial
from itertools import chain
from typing import Tuple
from typing import Dict, Tuple

import numpy as np
from pycocotools import mask as pycocotools_mask

from datumaro._capi import encode
from datumaro.util.image import lazy_image, load_image
Expand Down Expand Up @@ -223,15 +224,14 @@ def mask_to_polygons(mask, area_threshold=1):
Returns:
A list of polygons like [[x1,y1, x2,y2 ...], [...]]
"""
from pycocotools import mask as mask_utils

contours = extract_contours(mask)

polygons = []
for contour in contours:
# Check if the polygon is big enough
rle = mask_utils.frPyObjects([contour], mask.shape[0], mask.shape[1])
area = sum(mask_utils.area(rle))
rle = pycocotools_mask.frPyObjects([contour], mask.shape[0], mask.shape[1])
area = sum(pycocotools_mask.area(rle))
if area_threshold <= area:
polygons.append(contour)
return polygons
Expand Down Expand Up @@ -296,26 +296,24 @@ def crop_covered_segments(
...
]
"""
from pycocotools import mask as mask_utils

segments = [[s] for s in segments]
input_rles = [mask_utils.frPyObjects(s, height, width) for s in segments]
input_rles = [pycocotools_mask.frPyObjects(s, height, width) for s in segments]

for i, rle_bottom in enumerate(input_rles):
area_bottom = sum(mask_utils.area(rle_bottom))
area_bottom = sum(pycocotools_mask.area(rle_bottom))
if area_bottom < area_threshold:
segments[i] = [] if not return_masks else None
continue

rles_top = []
for j in range(i + 1, len(input_rles)):
rle_top = input_rles[j]
iou = sum(mask_utils.iou(rle_bottom, rle_top, [0]))[0]
iou = sum(pycocotools_mask.iou(rle_bottom, rle_top, [0]))[0]

if iou <= iou_threshold:
continue

area_top = sum(mask_utils.area(rle_top))
area_top = sum(pycocotools_mask.area(rle_top))
area_ratio = area_top / area_bottom

# If a segment is fully inside another one, skip this segment
Expand All @@ -334,11 +332,11 @@ def crop_covered_segments(
continue

rle_bottom = rle_bottom[0]
bottom_mask = mask_utils.decode(rle_bottom).astype(np.uint8)
bottom_mask = pycocotools_mask.decode(rle_bottom).astype(np.uint8)

if rles_top:
rle_top = mask_utils.merge(rles_top)
top_mask = mask_utils.decode(rle_top).astype(np.uint8)
rle_top = pycocotools_mask.merge(rles_top)
top_mask = pycocotools_mask.decode(rle_top).astype(np.uint8)

bottom_mask -= top_mask
bottom_mask[bottom_mask != 1] = 0
Expand All @@ -352,14 +350,23 @@ def crop_covered_segments(


def rles_to_mask(rles, width, height):
from pycocotools import mask as mask_utils

rles = mask_utils.frPyObjects(rles, height, width)
rles = mask_utils.merge(rles)
mask = mask_utils.decode(rles)
rles = pycocotools_mask.frPyObjects(rles, height, width)
rles = pycocotools_mask.merge(rles)
mask = pycocotools_mask.decode(rles)
return mask


def rle_to_mask(rle_uncompressed: Dict[str, np.ndarray]) -> np.ndarray:
"""Decode the uncompressed RLE string to the binary mask (2D np.ndarray)

The uncompressed RLE string can be obtained by
the datumaro.util.mask_tools.mask_to_rle() function
"""
resulting_mask = pycocotools_mask.frPyObjects(rle_uncompressed, *rle_uncompressed["size"])
resulting_mask = pycocotools_mask.decode(resulting_mask)
return resulting_mask


def find_mask_bbox(mask) -> Tuple[int, int, int, int]:
cols = np.any(mask, axis=0)
rows = np.any(mask, axis=1)
Expand Down
Loading