cvat-ai · nmanovic · Dec 12, 2019 · Dec 3, 2019 · Dec 3, 2019 · Dec 3, 2019
@@ -28,7 +28,7 @@ def __init__(self, url):
                         id=item_id, image=lazy_image(path))
                     items.append((item.id, item))
 
-        items = sorted(items, key=lambda e: e[0])
+        items = sorted(items, key=lambda e: int(e[0]))
         items = OrderedDict(items)
         self._items = items
 
@@ -74,7 +74,7 @@ def __init__(self, url, db_task, user):
                 id=cvat_anno.frame, annotations=dm_anno)
             dm_annotations.append((dm_item.id, dm_item))
 
-        dm_annotations = sorted(dm_annotations, key=lambda e: e[0])
+        dm_annotations = sorted(dm_annotations, key=lambda e: int(e[0]))
         self._items = OrderedDict(dm_annotations)
 
         self._subsets = None

@@ -102,7 +102,7 @@ def __init__(self, url):
                 id=item_id, image=self._make_image_loader(item_id))
             items.append((item.id, item))
 
-        items = sorted(items, key=lambda e: e[0])
+        items = sorted(items, key=lambda e: int(e[0]))
         items = OrderedDict(items)
         self._items = items
 

@@ -242,13 +242,23 @@ def _remote_image_converter(self, save_dir, server_url=None):
         images_meta = {
             'images': items,
         }
-        for db_image in self._db_task.image_set.all():
-            frame_info = {
-                'id': db_image.frame,
-                'width': db_image.width,
-                'height': db_image.height,
-            }
-            items.append(frame_info)
+        db_video = getattr(self._db_task, 'video', None)
+        if db_video is not None:
+            for i in range(self._db_task.size):
+                frame_info = {
+                    'id': str(i),
+                    'width': db_video.width,
+                    'height': db_video.height,
+                }
+                items.append(frame_info)
+        else:
+            for db_image in self._db_task.image_set.all():
+                frame_info = {
+                    'id': db_image.frame,
+                    'width': db_image.width,
+                    'height': db_image.height,
+                }
+                items.append(frame_info)
 
         with open(osp.join(save_dir, 'config.json'), 'w') as config_file:
             json.dump(config, config_file)
@@ -385,6 +395,11 @@ def clear_export_cache(task_id, file_path, file_ctime):
         'tag': 'yolo',
         'is_default': False,
     },
+    {
+        'name': 'TF Detection API TFrecord',
+        'tag': 'tf_detection_api',
+        'is_default': False,
+    },
 ]
 
 def get_export_formats():

@@ -90,12 +90,7 @@ def get_frame_path(self, frame):
     def get_image_frame(image_path):
         assert image_path.endswith('.jpg')
         index = os.path.splitext(os.path.basename(image_path))[0]
-
-        path = os.path.dirname(image_path)
-        d2 = os.path.basename(path)
-        d1 = os.path.basename(os.path.dirname(path))
-
-        return int(d1) * 10000 + int(d2) * 100 + int(index)
+        return int(index)
 
     def get_frame_step(self):
         match = re.search("step\s*=\s*([1-9]\d*)", self.frame_filter)

@@ -0,0 +1,25 @@
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+
+from django.test import TestCase
+from cvat.apps.engine.models import Task
+
+
+class TaskModelTest(TestCase):
+    def test_frame_id_path_conversions(self):
+        task_id = 1
+        task = Task(task_id)
+
+        for i in [10 ** p for p in range(6)]:
+            src_path_expected = osp.join(
+                str(i // 10000), str(i // 100), '%s.jpg' % i)
+            src_path = task.get_frame_path(i)
+
+            dst_frame = task.get_image_frame(src_path)
+
+            self.assertTrue(src_path.endswith(src_path_expected),
+                '%s vs. %s' % (src_path, src_path_expected))
+            self.assertEqual(i, dst_frame)
@@ -48,7 +48,7 @@ def build_import_parser(parser):
     import datumaro.components.importers as importers_module
     importers_list = [name for name, cls in importers_module.items]
 
-    parser.add_argument('source_path',
+    parser.add_argument('-s', '--source', required=True,
         help="Path to import a project from")
     parser.add_argument('-f', '--format', required=True,
         help="Source project format (options: %s)" % (', '.join(importers_list)))
@@ -60,6 +60,8 @@ def build_import_parser(parser):
         help="Name of the new project (default: same as project dir)")
     parser.add_argument('--overwrite', action='store_true',
         help="Overwrite existing files in the save directory")
+    parser.add_argument('--copy', action='store_true',
+        help="Make a deep copy instead of saving source links")
     return parser
 
 def import_command(args):
@@ -74,14 +76,19 @@ def import_command(args):
         project_name = osp.basename(project_dir)
 
     log.info("Importing project from '%s' as '%s'" % \
-        (args.source_path, args.format))
+        (args.source, args.format))
 
-    source_path = osp.abspath(args.source_path)
-    project = Project.import_from(source_path, args.format)
+    source = osp.abspath(args.source)
+    project = Project.import_from(source, args.format)
     project.config.project_name = project_name
     project.config.project_dir = project_dir
-    project = project.make_dataset()
-    project.save(merge=True, save_images=False)
+
+    dataset = project.make_dataset()
+    if args.copy:
+        log.info("Cloning data...")
+        dataset.save(merge=True, save_images=True)
+    else:
+        project.save()
 
     log.info("Project has been created at '%s'" % (project_dir))
 

@@ -25,6 +25,10 @@
 
 from datumaro.components.converters.yolo import YoloConverter
 
+from datumaro.components.converters.tfrecord import (
+    DetectionApiConverter,
+)
+
 
 items = [
     ('datumaro', DatumaroConverter),
@@ -44,4 +48,6 @@
     ('voc_layout', VocLayoutConverter),
 
     ('yolo', YoloConverter),
+
+    ('tf_detection_api', DetectionApiConverter),
 ]
@@ -0,0 +1,146 @@
+
+# Copyright (C) 2019 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import codecs
+from collections import OrderedDict
+import os
+import os.path as osp
+import string
+
+from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME
+from datumaro.components.formats.tfrecord import DetectionApiPath
+from datumaro.util.image import encode_image
+from datumaro.util.tf_util import import_tf as _import_tf
+
+
+# we need it to filter out non-ASCII characters, otherwise training will crash
+_printable = set(string.printable)
+def _make_printable(s):
+    return ''.join(filter(lambda x: x in _printable, s))
+
+def _make_tf_example(item, get_label_id, get_label, save_images=False):
+    tf = _import_tf()
+
+    def int64_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    def int64_list_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+    def bytes_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    def bytes_list_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+    def float_list_feature(value):
+        return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+    features = {
+        'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
+        'image/filename': bytes_feature(
+            ('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
+    }
+
+    if not item.has_image:
+        raise Exception(
+            "Failed to export dataset item '%s': item has no image" % item.id)
+    height, width, _ = item.image.shape
+
+    features.update({
+        'image/height': int64_feature(height),
+        'image/width': int64_feature(width),
+    })
+
+    if save_images and item.has_image:
+        fmt = DetectionApiPath.IMAGE_FORMAT
+        buffer = encode_image(item.image, DetectionApiPath.IMAGE_EXT)
+
+        features.update({
+            'image/encoded': bytes_feature(buffer),
+            'image/format': bytes_feature(fmt.encode('utf-8')),
+        })
+
+    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
+    xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box)
+    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
+    ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
+    classes_text = [] # List of string class name of bounding box (1 per box)
+    classes = [] # List of integer class id of bounding box (1 per box)
+
+    boxes = [ann for ann in item.annotations if ann.type is AnnotationType.bbox]
+    for box in boxes:
+        box_label = _make_printable(get_label(box.label))
+
+        xmins.append(box.points[0] / width)
+        xmaxs.append(box.points[2] / width)
+        ymins.append(box.points[1] / height)
+        ymaxs.append(box.points[3] / height)
+        classes_text.append(box_label.encode('utf-8'))
+        classes.append(get_label_id(box.label))
+
+    if boxes:
+        features.update({
+            'image/object/bbox/xmin': float_list_feature(xmins),
+            'image/object/bbox/xmax': float_list_feature(xmaxs),
+            'image/object/bbox/ymin': float_list_feature(ymins),
+            'image/object/bbox/ymax': float_list_feature(ymaxs),
+            'image/object/class/text': bytes_list_feature(classes_text),
+            'image/object/class/label': int64_list_feature(classes),
+        })
+
+    tf_example = tf.train.Example(
+        features=tf.train.Features(feature=features))
+
+    return tf_example
+
+class DetectionApiConverter:
+    def __init__(self, save_images=True):
+        self.save_images = save_images
+
+    def __call__(self, extractor, save_dir):
+        tf = _import_tf()
+
+        os.makedirs(save_dir, exist_ok=True)
+
+        subsets = extractor.subsets()
+        if len(subsets) == 0:
+            subsets = [ None ]
+
+        for subset_name in subsets:
+            if subset_name:
+                subset = extractor.get_subset(subset_name)
+            else:
+                subset_name = DEFAULT_SUBSET_NAME
+                subset = extractor
+
+            label_categories = subset.categories()[AnnotationType.label]
+            get_label = lambda label_id: label_categories.items[label_id].name \
+                if label_id is not None else ''
+            label_ids = OrderedDict((label.name, 1 + idx)
+                for idx, label in enumerate(label_categories.items))
+            map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
+
+            labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE)
+            with codecs.open(labelmap_path, 'w', encoding='utf8') as f:
+                for label, idx in label_ids.items():
+                    f.write(
+                        'item {\n' +
+                        ('\tid: %s\n' % (idx)) +
+                        ("\tname: '%s'\n" % (label)) +
+                        '}\n\n'
+                    )
+
+            anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name))
+            with tf.io.TFRecordWriter(anno_path) as writer:
+                for item in subset:
+                    tf_example = _make_tf_example(
+                        item,
+                        get_label=get_label,
+                        get_label_id=map_label_id,
+                        save_images=self.save_images,
+                    )
+                    writer.write(tf_example.SerializeToString())
@@ -30,6 +30,11 @@
     YoloExtractor,
 )
 
+from datumaro.components.extractors.tfrecord import (
+    DetectionApiExtractor,
+)
+
+
 items = [
     ('datumaro', DatumaroExtractor),
 
@@ -52,4 +57,6 @@
     ('voc_comp_9_10', VocComp_9_10_Extractor),
 
     ('yolo', YoloExtractor),
+
+    ('tf_detection_api', DetectionApiExtractor),
 ]