Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace tfrecord format support in CVAT with Datumaro #1157

Merged
merged 58 commits into from
Feb 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
c7dab01
Employ transforms and item wrapper
zhiltsov-max Feb 7, 2020
0801cf1
Add image class and tests
zhiltsov-max Feb 12, 2020
3400c07
Add image info support to formats
zhiltsov-max Feb 12, 2020
77645bd
Fix cli
zhiltsov-max Feb 12, 2020
995da31
Fix merge and voc converte
zhiltsov-max Feb 12, 2020
ad7eb97
Update remote images extractor
zhiltsov-max Feb 12, 2020
843a4aa
Codacy
zhiltsov-max Feb 13, 2020
26b8f60
Remove item name, require path in Image
zhiltsov-max Feb 17, 2020
6f9bec4
Merge images of dataset items
zhiltsov-max Feb 17, 2020
bd782b0
Update tests
zhiltsov-max Feb 17, 2020
7e226e0
Add image dir converter
zhiltsov-max Feb 17, 2020
b3bb8bf
Update Datumaro format
zhiltsov-max Feb 17, 2020
a49d354
Update COCO format with image info
zhiltsov-max Feb 17, 2020
2aa041a
Update CVAT format with image info
zhiltsov-max Feb 17, 2020
3b4f0a3
Update TFrecord format with image info
zhiltsov-max Feb 17, 2020
4ce9ad0
Update VOC formar with image info
zhiltsov-max Feb 17, 2020
8c17ad6
Update YOLO format with image info
zhiltsov-max Feb 17, 2020
d984265
Update dataset manager bindings with image info
zhiltsov-max Feb 17, 2020
a52e1c5
Add image name to id transform
zhiltsov-max Feb 17, 2020
55b1d2a
Replace YOLO export and import in CVAT with Datumaro
zhiltsov-max Feb 17, 2020
217bb45
Add masks support for tfrecord
zhiltsov-max Feb 18, 2020
1045893
Refactor coco
zhiltsov-max Feb 18, 2020
d3cac8c
Replace tfrecord format support
zhiltsov-max Feb 18, 2020
bf6580b
Employ transforms and item wrapper
zhiltsov-max Feb 7, 2020
baeaf86
Add image class and tests
zhiltsov-max Feb 12, 2020
f79768c
Add image info support to formats
zhiltsov-max Feb 12, 2020
fc94473
Fix cli
zhiltsov-max Feb 12, 2020
c3c1602
Fix merge and voc converte
zhiltsov-max Feb 12, 2020
ff57202
Update remote images extractor
zhiltsov-max Feb 12, 2020
82c3a56
Codacy
zhiltsov-max Feb 13, 2020
986ce38
Remove item name, require path in Image
zhiltsov-max Feb 17, 2020
a7824de
Merge images of dataset items
zhiltsov-max Feb 17, 2020
5a8677f
Update tests
zhiltsov-max Feb 17, 2020
d198fe9
Add image dir converter
zhiltsov-max Feb 17, 2020
a7f6198
Update Datumaro format
zhiltsov-max Feb 17, 2020
8ada47e
Update COCO format with image info
zhiltsov-max Feb 17, 2020
7d47ac9
Update CVAT format with image info
zhiltsov-max Feb 17, 2020
c11ee19
Update TFrecord format with image info
zhiltsov-max Feb 17, 2020
0a93b80
Update VOC formar with image info
zhiltsov-max Feb 17, 2020
f9e5c8c
Update YOLO format with image info
zhiltsov-max Feb 17, 2020
bef057b
Update dataset manager bindings with image info
zhiltsov-max Feb 17, 2020
e00e5a9
Add image name to id transform
zhiltsov-max Feb 17, 2020
addd22e
Fix coco export
zhiltsov-max Feb 18, 2020
fcfa8da
Replace YOLO export and import in CVAT with Datumaro
zhiltsov-max Feb 17, 2020
1629541
Add masks support for tfrecord
zhiltsov-max Feb 18, 2020
06a6f22
Refactor coco
zhiltsov-max Feb 18, 2020
c3cfae0
Fix comparison
zhiltsov-max Feb 20, 2020
a3178aa
Merge branch 'zm/dm-add-masks-to-tfrecord' into zm/replace-tfrecord-f…
zhiltsov-max Feb 20, 2020
81e52d3
Add masks support for tfrecord
zhiltsov-max Feb 18, 2020
2cbd714
Refactor coco
zhiltsov-max Feb 18, 2020
7d58f4d
Fix comparison
zhiltsov-max Feb 20, 2020
3b6fbf8
Remove dead code
zhiltsov-max Feb 20, 2020
5e0dc37
Extract common code for instances
zhiltsov-max Feb 20, 2020
f0322c8
Merge branch 'develop' into zm/dm-add-masks-to-tfrecord
zhiltsov-max Feb 20, 2020
adbe64d
Merge branch 'zm/dm-add-masks-to-tfrecord' into zm/replace-tfrecord-f…
zhiltsov-max Feb 20, 2020
2a18522
Merge branch 'develop' into zm/replace-tfrecord-format
zhiltsov-max Feb 21, 2020
bc6a86d
Merge branch 'develop' into zm/replace-tfrecord-format
zhiltsov-max Feb 21, 2020
49c1657
Remove unused import
zhiltsov-max Feb 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 15 additions & 149 deletions cvat/apps/annotation/tfrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,160 +23,26 @@
}

def dump(file_object, annotations):
import tensorflow as tf
import os
import string
from zipfile import ZipFile
import codecs
from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.project import Environment
from tempfile import TemporaryDirectory
from collections import OrderedDict

# we need it to filter out non-ASCII characters otherwise
# trainning will crash
printable = set(string.printable)

def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))

# Defining the main conversion function
def create_tf_example(img_id, img_size, image_name, boxes, label_ids):
# Process one image data per run
height = img_size[0]
width = img_size[1]

xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)

# Loop oer the boxes and fill the above fields
for box in boxes:
# filter out non-ASCII characters
box_name = ''.join(filter(lambda x: x in printable, box.label))

xmins.append(box.points[0] / width)
xmaxs.append(box.points[2] / width)
ymins.append(box.points[1] / height)
ymaxs.append(box.points[3] / height)
classes_text.append(box_name.encode('utf8'))
classes.append(label_ids[box.label])

tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': int64_feature(height),
'image/width': int64_feature(width),
'image/filename': bytes_feature(image_name.encode('utf8')),
'image/source_id': bytes_feature(str(img_id).encode('utf8')),
'image/object/bbox/xmin': float_list_feature(xmins),
'image/object/bbox/xmax': float_list_feature(xmaxs),
'image/object/bbox/ymin': float_list_feature(ymins),
'image/object/bbox/ymax': float_list_feature(ymaxs),
'image/object/class/text': bytes_list_feature(classes_text),
'image/object/class/label': int64_list_feature(classes),
}))
return tf_example

# Create the label map file
label_ids = OrderedDict((label[1]["name"], idx) for idx, label in enumerate(annotations.meta["task"]["labels"]))
with TemporaryDirectory() as out_dir:
labelmap_file = 'label_map.pbtxt'
with codecs.open(os.path.join(out_dir, labelmap_file), 'w', encoding='utf8') as f:
for label, idx in label_ids.items():
f.write(u'item {\n')
f.write(u'\tid: {}\n'.format(idx))
f.write(u"\tname: '{}'\n".format(label))
f.write(u'}\n\n')

annotation_file = '{}.tfrecord'.format(annotations.meta['task']['name'])
with tf.io.TFRecordWriter(os.path.join(out_dir, annotation_file)) as writer:
for frame_annotation in annotations.group_by_frame():
boxes = [shape for shape in frame_annotation.labeled_shapes if shape.type == 'rectangle']
if not boxes:
continue
tf_example = create_tf_example(
img_id=frame_annotation.frame,
img_size=(frame_annotation.height, frame_annotation.width),
image_name=frame_annotation.name,
boxes=boxes,
label_ids=label_ids,
)
writer.write(tf_example.SerializeToString())

with ZipFile(file_object, 'w') as output_zip:
output_zip.write(filename=os.path.join(out_dir, labelmap_file), arcname=labelmap_file)
output_zip.write(filename=os.path.join(out_dir, annotation_file), arcname=annotation_file)
extractor = CvatAnnotationsExtractor('', annotations)
converter = Environment().make_converter('tf_detection_api')
with TemporaryDirectory() as temp_dir:
converter(extractor, save_dir=temp_dir)
make_zip_archive(temp_dir, file_object)

def load(file_object, annotations):
from pyunpack import Archive
from tempfile import TemporaryDirectory
import os
import tensorflow as tf
from glob import glob
import numpy as np

tf.enable_eager_execution()
from datumaro.plugins.tf_detection_api_format.importer import TfDetectionApiImporter
from cvat.apps.dataset_manager.bindings import import_dm_annotations

def parse_tfrecord_file(filenames):
def clamp(value, _min, _max):
return max(min(_max, value), _min)

dataset = tf.data.TFRecordDataset(filenames)
image_feature_description = {
'image/filename': tf.io.FixedLenFeature([], tf.string),
'image/source_id': tf.io.FixedLenFeature([], tf.string),
'image/height': tf.io.FixedLenFeature([], tf.int64),
'image/width': tf.io.FixedLenFeature([], tf.int64),
# Object boxes and classes.
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/label': tf.io.VarLenFeature(tf.int64),
'image/object/class/text': tf.io.VarLenFeature(tf.string),
}

for record in dataset:
parsed_record = tf.io.parse_single_example(record, image_feature_description)
frame_number = annotations.match_frame(parsed_record['image/filename'].numpy().decode('utf-8'))
frame_height = tf.cast(parsed_record['image/height'], tf.int64).numpy().item()
frame_width = tf.cast(parsed_record['image/width'], tf.int64).numpy().item()
xmins = tf.sparse.to_dense(parsed_record['image/object/bbox/xmin']).numpy()
ymins = tf.sparse.to_dense(parsed_record['image/object/bbox/ymin']).numpy()
xmaxs = tf.sparse.to_dense(parsed_record['image/object/bbox/xmax']).numpy()
ymaxs = tf.sparse.to_dense(parsed_record['image/object/bbox/ymax']).numpy()
labels = tf.sparse.to_dense(parsed_record['image/object/class/text'], default_value='').numpy()
for shape in np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]:
annotations.add_shape(annotations.LabeledShape(
type='rectangle',
frame=frame_number,
label=shape[0].decode("utf-8"),
points=[
clamp(shape[1] * frame_width, 0, frame_width),
clamp(shape[2] * frame_height, 0, frame_height),
clamp(shape[3] * frame_width, 0, frame_width),
clamp(shape[4] * frame_height, 0, frame_height),
],
occluded=False,
attributes=[],
))

archive_file = getattr(file_object, 'name')
archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name")
with TemporaryDirectory() as tmp_dir:
Archive(archive_file).extractall(tmp_dir)
filenames = glob(os.path.join(tmp_dir, '*.tfrecord'))
parse_tfrecord_file(filenames)

dm_project = TfDetectionApiImporter()(tmp_dir)
dm_dataset = dm_project.make_dataset()
import_dm_annotations(dm_dataset, annotations)
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,15 @@ def _export_instances(self, instances, width, height):
def _make_tf_example(self, item):
features = {
'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
'image/filename': bytes_feature(
('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
}

filename = ''
if item.has_image:
filename = item.image.filename
if not filename:
filename = item.id + DetectionApiPath.IMAGE_EXT
features['image/filename'] = bytes_feature(filename.encode('utf-8'))

if not item.has_image:
raise Exception("Failed to export dataset item '%s': "
"item has no image info" % item.id)
Expand Down
12 changes: 1 addition & 11 deletions datumaro/datumaro/plugins/tf_detection_api_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,7 @@ def _parse_tfrecord_file(cls, filepath, subset_name, images_dir):
image_params = {}
if frame_image and frame_format:
image_params['data'] = lazy_image(frame_image, decode_image)
if frame_filename and images_dir:
image_params['path'] = osp.join(images_dir, frame_filename)

image_size = None
if frame_height and frame_width:
image_size = (frame_height, frame_width)

image_params = {}
if frame_image and frame_format:
image_params['data'] = lazy_image(frame_image, decode_image)
if frame_filename and images_dir:
if frame_filename:
image_params['path'] = osp.join(images_dir, frame_filename)

image = None
Expand Down