openvinotoolkit · zhiltsov-max · Mar 31, 2021 · Mar 30, 2021 · Mar 30, 2021 · Mar 30, 2021
@@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 -
 
 ### Changed
--
+- Added an option to allow undeclared annotation attributes in CVAT format export (<https://github.com/openvinotoolkit/datumaro/pull/192>)
 
 ### Deprecated
 -
@@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 -
 
 ### Fixed
--
+- Added support for label attributes in Datumaro format (<https://github.com/openvinotoolkit/datumaro/pull/192>)
 
 ### Security
 -

@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: MIT
 
 import argparse
+import logging as log
 
 from datumaro.cli.util import MultilineFormatter
 from datumaro.util import to_snake_case
@@ -36,7 +37,12 @@ def parse_cmdline(cls, args=None):
             args = args[1:]
         parser = cls.build_cmdline_parser()
         args = parser.parse_args(args)
-        return vars(args)
+        args = vars(args)
+
+        log.debug("Parsed parameters: \n\t%s",
+            '\n\t'.join('%s: %s' % (k, v) for k, v in args.items()))
+
+        return args
 
 def remove_plugin_type(s):
     for t in {'transform', 'extractor', 'converter', 'launcher', 'importer'}:

@@ -7,6 +7,7 @@
 import os
 import os.path as osp
 from collections import OrderedDict
+from itertools import chain
 from xml.sax.saxutils import XMLGenerator
 
 from datumaro.components.converter import Converter
@@ -182,9 +183,9 @@ def _write_item(self, item, index):
         for ann in item.annotations:
             if ann.type in {AnnotationType.points, AnnotationType.polyline,
                     AnnotationType.polygon, AnnotationType.bbox}:
-                self._write_shape(ann)
+                self._write_shape(ann, item)
             elif ann.type == AnnotationType.label:
-                self._write_tag(ann)
+                self._write_tag(ann, item)
             else:
                 continue
 
@@ -215,7 +216,7 @@ def _write_meta(self):
                                 ("input_type", "text"),
                                 ("default_value", ""),
                                 ("values", ""),
-                            ])) for attr in label.attributes
+                            ])) for attr in self._get_label_attrs(label)
                         ])
                     ])) for label in label_cat.items
                 ]),
@@ -226,15 +227,27 @@ def _write_meta(self):
     def _get_label(self, label_id):
         if label_id is None:
             return ""
-        label_cat = self._extractor.categories()[AnnotationType.label]
+        label_cat = self._extractor.categories().get(
+            AnnotationType.label, LabelCategories())
         return label_cat.items[label_id]
 
-    def _write_shape(self, shape):
+    def _get_label_attrs(self, label):
+        label_cat = self._extractor.categories().get(
+            AnnotationType.label, LabelCategories())
+        if isinstance(label, int):
+            label = label_cat[label]
+        return set(chain(label.attributes, label_cat.attributes)) - \
+            self._context._builtin_attrs
+
+    def _write_shape(self, shape, item):
         if shape.label is None:
+            log.warning("Item %s: skipping a %s with no label",
+                item.id, shape.type.name)
             return
 
+        label_name = self._get_label(shape.label).name
         shape_data = OrderedDict([
-            ("label", self._get_label(shape.label).name),
+            ("label", label_name),
             ("occluded", str(int(shape.attributes.get('occluded', False)))),
         ])
 
@@ -271,13 +284,21 @@ def _write_shape(self, shape):
             raise NotImplementedError("unknown shape type")
 
         for attr_name, attr_value in shape.attributes.items():
+            if attr_name in self._context._builtin_attrs:
+                continue
             if isinstance(attr_value, bool):
                 attr_value = 'true' if attr_value else 'false'
-            if attr_name in self._get_label(shape.label).attributes:
+            if self._context._allow_undeclared_attrs or \
+                    attr_name in self._get_label_attrs(shape.label):
                 self._writer.add_attribute(OrderedDict([
                     ("name", str(attr_name)),
                     ("value", str(attr_value)),
                 ]))
+            else:
+                log.warning("Item %s: skipping undeclared "
+                    "attribute '%s' for label '%s' "
+                    "(allow with --allow-undeclared-attrs option)",
+                    item.id, attr_name, label_name)
 
         if shape.type == AnnotationType.bbox:
             self._writer.close_box()
@@ -290,25 +311,36 @@ def _write_shape(self, shape):
         else:
             raise NotImplementedError("unknown shape type")
 
-    def _write_tag(self, label):
+    def _write_tag(self, label, item):
         if label.label is None:
+            log.warning("Item %s: skipping a %s with no label",
+                item.id, label.type.name)
             return
 
+        label_name = self._get_label(label.label).name
         tag_data = OrderedDict([
-            ('label', self._get_label(label.label).name),
+            ('label', label_name),
         ])
         if label.group:
             tag_data['group_id'] = str(label.group)
         self._writer.open_tag(tag_data)
 
         for attr_name, attr_value in label.attributes.items():
+            if attr_name in self._context._builtin_attrs:
+                continue
             if isinstance(attr_value, bool):
                 attr_value = 'true' if attr_value else 'false'
-            if attr_name in self._get_label(label.label).attributes:
+            if self._context._allow_undeclared_attrs or \
+                    attr_name in self._get_label_attrs(label.label):
                 self._writer.add_attribute(OrderedDict([
                     ("name", str(attr_name)),
                     ("value", str(attr_value)),
                 ]))
+            else:
+                log.warning("Item %s: skipping undeclared "
+                    "attribute '%s' for label '%s' "
+                    "(allow with --allow-undeclared-attrs option)",
+                    item.id, attr_name, label_name)
 
         self._writer.close_tag()
 
@@ -320,12 +352,18 @@ def build_cmdline_parser(cls, **kwargs):
         parser = super().build_cmdline_parser(**kwargs)
         parser.add_argument('--reindex', action='store_true',
             help="Assign new indices to frames (default: %(default)s)")
+        parser.add_argument('--allow-undeclared-attrs', action='store_true',
+            help="Write annotation attributes even if they are not present in "
+                "the input dataset metainfo (default: %(default)s)")
         return parser
 
-    def __init__(self, extractor, save_dir, reindex=False, **kwargs):
+    def __init__(self, extractor, save_dir, reindex=False,
+            allow_undeclared_attrs=False, **kwargs):
         super().__init__(extractor, save_dir, **kwargs)
 
         self._reindex = reindex
+        self._builtin_attrs = CvatPath.BUILTIN_ATTRS
+        self._allow_undeclared_attrs = allow_undeclared_attrs
 
     def apply(self):
         self._images_dir = osp.join(self._save_dir, CvatPath.IMAGES_DIR)

@@ -7,3 +7,5 @@ class CvatPath:
     IMAGES_DIR = 'images'
 
     IMAGE_EXT = '.jpg'
+
+    BUILTIN_ATTRS = {'occluded', 'outside', 'keyframe', 'track_id'}
@@ -181,14 +181,19 @@ def _convert_caption_object(self, obj):
         })
         return converted
 
+    def _convert_attribute_categories(self, attributes):
+        return sorted(attributes)
+
     def _convert_label_categories(self, obj):
         converted = {
             'labels': [],
+            'attributes': self._convert_attribute_categories(obj.attributes),
         }
         for label in obj.items:
             converted['labels'].append({
                 'name': cast(label.name, str),
                 'parent': cast(label.parent, str),
+                'attributes': self._convert_attribute_categories(label.attributes),
             })
         return converted
 

@@ -39,9 +39,11 @@ def _load_categories(parsed):
 
         parsed_label_cat = parsed['categories'].get(AnnotationType.label.name)
         if parsed_label_cat:
-            label_categories = LabelCategories()
+            label_categories = LabelCategories(
+                attributes=parsed_label_cat.get('attributes', []))
             for item in parsed_label_cat['labels']:
-                label_categories.add(item['name'], parent=item['parent'])
+                label_categories.add(item['name'], parent=item['parent'],
+                    attributes=item.get('attributes', []))
 
             categories[AnnotationType.label] = label_categories
 

@@ -1,8 +1,8 @@
 from functools import partial
-import numpy as np
 import os
 import os.path as osp
 
+import numpy as np
 from unittest import TestCase
 from datumaro.components.project import Dataset
 from datumaro.components.extractor import (DatasetItem,
@@ -148,18 +148,17 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
             target_dataset=target_dataset, importer_args=importer_args, **kwargs)
 
     def test_can_save_and_load(self):
-        label_categories = LabelCategories()
+        src_label_cat = LabelCategories(attributes={'occluded', 'common'})
         for i in range(10):
-            label_categories.add(str(i))
-        label_categories.items[2].attributes.update(['a1', 'a2', 'empty'])
-        label_categories.attributes.update(['occluded'])
+            src_label_cat.add(str(i))
+        src_label_cat.items[2].attributes.update(['a1', 'a2', 'empty'])
 
         source_dataset = Dataset.from_iterable([
             DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
                 annotations=[
                     Polygon([0, 0, 4, 0, 4, 4],
                         label=1, group=4,
-                        attributes={ 'occluded': True}),
+                        attributes={ 'occluded': True, 'common': 't' }),
                     Points([1, 1, 3, 2, 2, 3],
                         label=2,
                         attributes={ 'a1': 'x', 'a2': 42, 'empty': '',
@@ -188,16 +187,19 @@ def test_can_save_and_load(self):
 
             DatasetItem(id=3, subset='s3', image=Image(
                 path='3.jpg', size=(2, 4))),
-        ], categories={
-            AnnotationType.label: label_categories,
-        })
+        ], categories={ AnnotationType.label: src_label_cat })
 
+        target_label_cat = LabelCategories(
+            attributes={'occluded'}) # unable to represent a common attribute
+        for i in range(10):
+            target_label_cat.add(str(i), attributes={'common'})
+        target_label_cat.items[2].attributes.update(['a1', 'a2', 'empty', 'common'])
         target_dataset = Dataset.from_iterable([
             DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
                 annotations=[
                     Polygon([0, 0, 4, 0, 4, 4],
                         label=1, group=4,
-                        attributes={ 'occluded': True }),
+                        attributes={ 'occluded': True, 'common': 't' }),
                     Points([1, 1, 3, 2, 2, 3],
                         label=2,
                         attributes={ 'occluded': False, 'empty': '',
@@ -228,15 +230,36 @@ def test_can_save_and_load(self):
             DatasetItem(id=3, subset='s3', image=Image(
                     path='3.jpg', size=(2, 4)),
                 attributes={'frame': 0}),
-        ], categories={
-            AnnotationType.label: label_categories,
-        })
+        ], categories={ AnnotationType.label: target_label_cat })
 
         with TestDir() as test_dir:
             self._test_save_and_load(source_dataset,
                 partial(CvatConverter.convert, save_images=True), test_dir,
                 target_dataset=target_dataset)
 
+    def test_can_allow_undeclared_attrs(self):
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id=0, annotations=[
+                Label(0, attributes={ 'x': 4, 'y': 2 }),
+                Bbox(1, 2, 3, 4, label=0, attributes={ 'x': 1, 'y': 1 }),
+            ]),
+        ], categories=[ ('a', '', {'x'}) ])
+
+        target_label_cat = LabelCategories(attributes={'occluded'})
+        target_label_cat.add('a', attributes={'x'})
+        target_dataset = Dataset.from_iterable([
+            DatasetItem(id=0, annotations=[
+                Label(0, attributes={ 'x': 4, 'y': 2 }),
+                Bbox(1, 2, 3, 4, label=0,
+                    attributes={ 'x': 1, 'y': 1, 'occluded': False }),
+            ], attributes={'frame': 0}),
+        ], categories={ AnnotationType.label: target_label_cat })
+
+        with TestDir() as test_dir:
+            self._test_save_and_load(source_dataset,
+                partial(CvatConverter.convert, allow_undeclared_attrs=True),
+                test_dir, target_dataset=target_dataset)
+
     def test_relative_paths(self):
         source_dataset = Dataset.from_iterable([
             DatasetItem(id='1', image=np.ones((4, 2, 3))),
@@ -259,11 +282,10 @@ def test_relative_paths(self):
                 target_dataset=target_dataset, require_images=True)
 
     def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
-        label_categories = LabelCategories()
+        label_categories = LabelCategories(attributes={'occluded'})
         for i in range(10):
             label_categories.add(str(i))
         label_categories.items[2].attributes.update(['a1', 'a2', 'empty'])
-        label_categories.attributes.update(['occluded'])
 
         source_dataset = Dataset.from_iterable([
             DatasetItem(id='кириллица с пробелом',

@@ -3,7 +3,6 @@
 import os.path as osp
 
 import numpy as np
-
 from unittest import TestCase
 from datumaro.components.project import Dataset
 from datumaro.components.extractor import (DatasetItem,
@@ -29,9 +28,9 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
 
     @property
     def test_dataset(self):
-        label_categories = LabelCategories()
+        label_categories = LabelCategories(attributes={'a', 'b', 'score'})
         for i in range(5):
-            label_categories.add('cat' + str(i))
+            label_categories.add('cat' + str(i), attributes={'x', 'y'})
 
         mask_categories = MaskCategories(
             generate_colormap(len(label_categories.items)))
@@ -52,9 +51,14 @@ def test_dataset(self):
                     Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
                         'score': 1.0,
                     }),
-                    Bbox(5, 6, 7, 8, id=5, group=5),
-                    Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
-                    Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
+                    Bbox(5, 6, 7, 8, id=5, group=5, attributes={
+                        'a': 1.5,
+                        'b': 'text',
+                    }),
+                    Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4,
+                        attributes={ 'x': 1, 'y': '2', }),
+                    Mask(label=3, id=5, z_order=2, image=np.ones((2, 3)),
+                        attributes={ 'x': 1, 'y': '2', }),
                 ]),
             DatasetItem(id=21, subset='train',
                 annotations=[