From 0efc6a08a5f128035f8f85799965c40de9699114 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 13 Sep 2019 14:26:05 +0300 Subject: [PATCH 01/16] added handling of truncated and difficult attributes for pascal voc loader/dumper added descriptions of supported annotation formats --- cvat/apps/annotation/README.md | 172 ++++++++++++++++++++++++++++- cvat/apps/annotation/pascal_voc.py | 31 +++++- 2 files changed, 200 insertions(+), 3 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index c1a317b40e49..6e492e19c036 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -131,7 +131,7 @@ It allows to download and upload annotations in different formats and easily add annotations.add_shape(shape) ``` - Full examples can be found in [builtin](builtin) folder. + Full examples can be found in corrseponding *.py files (cvat.py, coco.py, yolo.py, etc.). 1. Add path to a new python script to the annotation app settings: ```python @@ -150,3 +150,173 @@ It allows to download and upload annotations in different formats and easily add Possible solutions: install additional modules via pip call to a separate directory for each Annotation Format to reduce version conflicts, etc. Thus, custom code can be run in an extended environment, and core CVAT modules should not be affected. As well, this functionality can be useful for Auto Annotation module. + +## Format specifications + +### CVAT +This is native CVAT annotation format. +[Detailed format description](cvat/apps/documentation/xml_format.md) + +##### CVAT XML for images +downloaded file: Single unpacked XML +supported shapes - Rectangles, Polygons, Polylines, Points + +##### CVAT XML for videos +downloaded file: Single unpacked XML +supported shapes - Rectangles, Polygons, Polylines, Points + +#### CVAT XML Loader +uploaded file: Single unpacked XML +supported shapes - Rectangles, Polygons, Polylines, Points + + +### [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) + +#### Dumper description +- downloaded file: a zip archive with following structure: + ``` + taskname.zip + ├── frame000001.xml + ├── frame000002.xml + ├── frame000003.xml + └── ... + ``` + Each *.xml file contains annotations for a frame with the same name as annotation file. Detailed structure specification of the *.xml file can be found [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf) +- supported shapes - Rectangles +- additional comments: If you plan to use 'truncated' and 'difficult' attributes please add to CVAT labels attributes the corresponding items: + `~checkbox=difficult:false ~checkbox=truncated:false` + +#### Loader description +- uploaded file: a zip archive with following structure: + ``` + taskname.zip + ├── frame000001.xml + ├── frame000002.xml + ├── frame000003.xml + └── ... + ``` + Each *.xml file contains annotations for a frame with the same name as annotation file. Note: it should be possible to match the CVAT frame(imagename) and image filename from the annotation *.xml file(tag filename, e.g. `2008_004457.jpg`). There are 2 options: + 1. full match between image name and filename form annotation *.xml file (in case of a task was created from images or archive of images). + 2. match by frame number (if CVAT cannot match by name). If filename tag contains a number CVAT will interpret it as frame number. + +- supported shapes: Rectangles +- limitations: Support of Pascal VOC object detection format +- additional comments: the CVAT task should be created with the full label set that may be in the annotation files + +#### How to create a task from Pascal VOC dataset +1. Download the Pascal Voc dataset +1. Create the CVAT task with the following labels: + ``` + aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor + ``` + You can add `~checkbox=difficult:false ~checkbox=truncated:false` attributes if you want to use it. + + Select interesting image files (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details) +1. zip the corresponding annotation files +1. click `Upload annotation` button, choose `Pascal VOC ZIP 1.0` and select the *.zip file with annotations from previous step. It may take some time. + +### [YOLO](https://pjreddie.com/darknet/yolo/) +#### Dumper description +- downloaded file: a zip archive with following structure: + ``` + taskname.zip + ├── frame000001.txt + ├── frame000002.txt + ├── ... + └── obj.names + ``` + Each `*.txt` file contains annotations for a frame with the same name as annotation file. Short description of `*.txt` file structure: each line describes label and bounding box in the following format `label_id cx cy w h`. + `obj.names` contains the ordered list of label names. +- supported shapes - Rectangles + +#### Loader description +- uploaded file: a zip archive with following structure: + ``` + taskname.zip + ├── frame000001.txt + ├── frame000002.txt + ├── frame000003.txt + ├── ... + └──obj.names + ``` + Each `*.txt` file contains annotations for a frame with the same name as annotation file. Note: it should be possible to match the CVAT frame(imagename) and annotation filename. There are 2 options: + 1. full match between image name and filename form annotation `*.txt` file (in case of a task was created from images or archive of images). + 2. match by frame number (if CVAT cannot match by name). If name of the *.txt annotation file contains a number CVAT will interpret it as frame number. + +- supported shapes: Rectangles +- additional comments: the CVAT task should be created with the full label set that may be in the annotation files + +### [MS COCO Object Detection](http://cocodataset.org/#format-data) +#### Dumper description +- downloaded file: single unpacked `json`. Detailed description of the MS COCO format can be found [here](http://cocodataset.org/#format-data) +- supported shapes - Polygons, Rectangles (interpreted as polygons) + +#### Loader description +- uploaded file: single unpacked `*.json`. +- supported shapes: Polygons +- additional comments: the CVAT task should be created with the full label set that may be in the annotation files + +#### How to create a task from PMS COCO dataset +1. Download the [MS COCO dataset](http://cocodataset.org/#download). For example [2017 Val images](http://images.cocodataset.org/zips/val2017.zip) and [2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). +1. Create a CVAT task with the following labels: + ``` + "person" "bicycle" "car" "motorcycle" "airplane" "bus" "train" "truck" "boat" "traffic light" "fire hydrant" "stop sign" "parking meter" "bench" "bird" "cat" "dog" "horse" "sheep" "cow" "elephant" "bear" "zebra" "giraffe" "backpack" "umbrella" "handbag" "tie" "suitcase" "frisbee" "skis" "snowboard" "sports ball" "kite" "baseball bat" "baseball glove" "skateboard" "surfboard" "tennis racket" "bottle" "wine glass" "cup" "fork" "knife" "spoon" "bowl" "banana" "apple" "sandwich" "orange" "broccoli" "carrot" "hot dog" "pizza" "donut" "cake" "chair" "couch" "potted plant" "bed" "dining table" "toilet" "tv" "laptop" "mouse" "remote" "keyboard" "cell phone" "microwave" "oven" "toaster" "sink" "refrigerator" "book" "clock" "vase" "scissors" "teddy bear" "hair drier" "toothbrush" + ``` + + Select val2017.zip as data (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details) +1. unpack annotations_trainval2017.zip +1. click `Upload annotation` button, choose `COCO JSON 1.0` and select `instances_val2017.json.json` annotation file. It may take some time. + +### [TFRecord](https://www.tensorflow.org/tutorials/load_data/tf_records) +TFRecord is a very flexible format, but we try to correspond the format that used in [TF object detection](https://github.com/tensorflow/models/tree/master/research/object_detection) with minimal modifications. +Used feature description: +``` +image_feature_description = { + 'image/filename': tf.io.FixedLenFeature([], tf.string), + 'image/source_id': tf.io.FixedLenFeature([], tf.int64), + 'image/height': tf.io.FixedLenFeature([], tf.int64), + 'image/width': tf.io.FixedLenFeature([], tf.int64), + # Object boxes and classes. + 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32), + 'image/object/class/label': tf.io.VarLenFeature(tf.int64), + 'image/object/class/text': tf.io.VarLenFeature(tf.string), +} +``` +#### Dumper description +- downloaded file: a zip archive with following structure: + ``` + taskname.zip + ├── task2.tfrecord + └── label_map.pbtxt + ``` +- supported shapes - Rectangles + +#### Loader description +- uploaded file: a zip archive with following structure: + ``` + taskname.zip + └── task2.tfrecord + ``` +- supported shapes: Rectangles +- additional comments: the CVAT task should be created with the full label set that may be in the annotation files + +### PNG mask +#### Dumper description +- downloaded file: a zip archive with following structure: + ``` + taskname.zip + ├── frame000001.png + ├── frame000002.png + ├── frame000003.png + ├── ... + └── colormap.txt + ``` + Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label. Color generation correspond to the Pascal VOC color generation [algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap). (0, 0, 0) is used for background. + colormap.txt file contains the values of used colors in RGB format. +- supported shapes - Rectangles, Polygons + +#### Loader description +Not supported diff --git a/cvat/apps/annotation/pascal_voc.py b/cvat/apps/annotation/pascal_voc.py index 1a9c680624a2..6463736d4c3b 100644 --- a/cvat/apps/annotation/pascal_voc.py +++ b/cvat/apps/annotation/pascal_voc.py @@ -50,6 +50,8 @@ def parse_xml_file(annotation_file): import xml.etree.ElementTree as ET root = ET.parse(annotation_file).getroot() frame_number = match_frame(annotations.frame_info, root.find('filename').text) + with open('/tmp/fff.txt', 'w+') as fff: + fff.write(root.find('filename').text + '\n') for obj_tag in root.iter('object'): bbox_tag = obj_tag.find("bndbox") @@ -58,6 +60,10 @@ def parse_xml_file(annotation_file): ymin = float(bbox_tag.find('ymin').text) xmax = float(bbox_tag.find('xmax').text) ymax = float(bbox_tag.find('ymax').text) + truncated = obj_tag.find('truncated') + truncated = truncated.text if truncated is not None else 0 + difficult = obj_tag.find('difficult') + difficult = difficult.text if difficult is not None else 0 annotations.add_shape(annotations.LabeledShape( type='rectangle', @@ -65,7 +71,10 @@ def parse_xml_file(annotation_file): label=label, points=[xmin, ymin, xmax, ymax], occluded=False, - attributes=[], + attributes=[ + annotations.Attribute('truncated', truncated), + annotations.Attribute('difficult', difficult), + ], )) archive_file = getattr(file_object, 'name') @@ -97,12 +106,30 @@ def dump(file_object, annotations): for shape in frame_annotation.labeled_shapes: if shape.type != "rectangle": continue + label = shape.label xtl = shape.points[0] ytl = shape.points[1] xbr = shape.points[2] ybr = shape.points[3] - writer.addObject(label, xtl, ytl, xbr, ybr) + + difficult = 0 + truncated = 0 + for attribute in shape.attributes: + if attribute.name == 'truncated' and 'true' == attribute.value.lower(): + truncated = 1 + elif attribute.name == 'difficult' and 'true' == attribute.value.lower(): + difficult = 1 + + writer.addObject( + name=label, + xmin=xtl, + ymin=ytl, + xmax=xbr, + ymax=ybr, + truncated=truncated, + difficult=difficult, + ) anno_name = os.path.basename('{}.{}'.format(os.path.splitext(image_name)[0], 'xml')) anno_file = os.path.join(out_dir, anno_name) From d18cd5865489d7f801d7d1e7f7d9a368d42ef0db Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 11 Oct 2019 14:56:01 +0300 Subject: [PATCH 02/16] added YOLO example --- cvat/apps/annotation/README.md | 42 +++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 6e492e19c036..83e33e4f4646 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -246,6 +246,46 @@ supported shapes - Rectangles, Polygons, Polylines, Points - supported shapes: Rectangles - additional comments: the CVAT task should be created with the full label set that may be in the annotation files +#### How to create a task from YOLO formated dataset (from VOC for example) +1. Follow the official [guide](https://pjreddie.com/darknet/yolo/)(see Training YOLO on VOC section) and prepare the YOLO formatted annotation. +1. Zip train images + ``` + zip images.zip -j -@ < train.txt + ``` +1. Create a CVAT task with the following labels: + ``` + aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor + ``` + Select images.zip as data. Most likely you should use `share` functionality because size of images.zip is more then 500Mb. See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details. +1. Create `obj.names` with the following content: + ``` + aeroplane + bicycle + bird + boat + bottle + bus + car + cat + chair + cow + diningtable + dog + horse + motorbike + person + pottedplant + sheep + sofa + train + tvmonitor + ``` +1. Zip all label files together (we need add only label files that correspond train subset) + ``` + cat train.txt | while read p; do echo ${p%/*/*}/labels/${${p##*/}%%.*}.txt; done | zip labels.zip -j -@ obj.names + ``` +1. Click `Upload annotation` button, choose `YOLO ZIP 1.0` and select the *.zip file with labels from previous step. It may take some time. + ### [MS COCO Object Detection](http://cocodataset.org/#format-data) #### Dumper description - downloaded file: single unpacked `json`. Detailed description of the MS COCO format can be found [here](http://cocodataset.org/#format-data) @@ -256,7 +296,7 @@ supported shapes - Rectangles, Polygons, Polylines, Points - supported shapes: Polygons - additional comments: the CVAT task should be created with the full label set that may be in the annotation files -#### How to create a task from PMS COCO dataset +#### How to create a task from MS COCO dataset 1. Download the [MS COCO dataset](http://cocodataset.org/#download). For example [2017 Val images](http://images.cocodataset.org/zips/val2017.zip) and [2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). 1. Create a CVAT task with the following labels: ``` From 55004f5a1d7ef991bdb0f8b68fe9e06ea1a6cecf Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 11 Oct 2019 16:24:53 +0300 Subject: [PATCH 03/16] fixed quotes --- cvat/apps/annotation/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 83e33e4f4646..4e088a8337b5 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -300,7 +300,7 @@ supported shapes - Rectangles, Polygons, Polylines, Points 1. Download the [MS COCO dataset](http://cocodataset.org/#download). For example [2017 Val images](http://images.cocodataset.org/zips/val2017.zip) and [2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). 1. Create a CVAT task with the following labels: ``` - "person" "bicycle" "car" "motorcycle" "airplane" "bus" "train" "truck" "boat" "traffic light" "fire hydrant" "stop sign" "parking meter" "bench" "bird" "cat" "dog" "horse" "sheep" "cow" "elephant" "bear" "zebra" "giraffe" "backpack" "umbrella" "handbag" "tie" "suitcase" "frisbee" "skis" "snowboard" "sports ball" "kite" "baseball bat" "baseball glove" "skateboard" "surfboard" "tennis racket" "bottle" "wine glass" "cup" "fork" "knife" "spoon" "bowl" "banana" "apple" "sandwich" "orange" "broccoli" "carrot" "hot dog" "pizza" "donut" "cake" "chair" "couch" "potted plant" "bed" "dining table" "toilet" "tv" "laptop" "mouse" "remote" "keyboard" "cell phone" "microwave" "oven" "toaster" "sink" "refrigerator" "book" "clock" "vase" "scissors" "teddy bear" "hair drier" "toothbrush" + person bicycle car motorcycle airplane bus train truck boat "traffic light" "fire hydrant" "stop sign" "parking meter" bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard "sports ball" kite "baseball bat" "baseball glove" skateboard surfboard "tennis racket" bottle "wine glass" cup fork knife spoon bowl banana apple sandwich orange broccoli carrot "hot dog" pizza donut cake chair couch "potted plant" bed "dining table" toilet tv laptop mouse remote keyboard "cell phone" microwave oven toaster sink refrigerator book clock vase scissors "teddy bear" "hair drier" toothbrush ``` Select val2017.zip as data (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details) From 407384ce9cbaa8eb0ee8172fd9c580f389b0e45a Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 11 Oct 2019 16:28:06 +0300 Subject: [PATCH 04/16] removed debug code --- cvat/apps/annotation/pascal_voc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cvat/apps/annotation/pascal_voc.py b/cvat/apps/annotation/pascal_voc.py index 6463736d4c3b..2bcfb8a41c1e 100644 --- a/cvat/apps/annotation/pascal_voc.py +++ b/cvat/apps/annotation/pascal_voc.py @@ -50,8 +50,6 @@ def parse_xml_file(annotation_file): import xml.etree.ElementTree as ET root = ET.parse(annotation_file).getroot() frame_number = match_frame(annotations.frame_info, root.find('filename').text) - with open('/tmp/fff.txt', 'w+') as fff: - fff.write(root.find('filename').text + '\n') for obj_tag in root.iter('object'): bbox_tag = obj_tag.find("bndbox") From fe2ef81e94a8fe462072f9da2e22f0effd36f98f Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 11 Oct 2019 18:06:56 +0300 Subject: [PATCH 05/16] made match_frame as Annotations method changed 'image/source_id' field TF feature from int64 to string (according to TF OD API dataset utlis) --- cvat/apps/annotation/README.md | 2 +- cvat/apps/annotation/annotation.py | 20 ++++++++++++++++++++ cvat/apps/annotation/coco.py | 22 +--------------------- cvat/apps/annotation/pascal_voc.py | 21 +-------------------- cvat/apps/annotation/tfrecord.py | 6 +++--- cvat/apps/annotation/yolo.py | 25 +++---------------------- 6 files changed, 29 insertions(+), 67 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 4e088a8337b5..62491dd88d55 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -313,7 +313,7 @@ Used feature description: ``` image_feature_description = { 'image/filename': tf.io.FixedLenFeature([], tf.string), - 'image/source_id': tf.io.FixedLenFeature([], tf.int64), + 'image/source_id': tf.io.FixedLenFeature([], tf.string), 'image/height': tf.io.FixedLenFeature([], tf.int64), 'image/width': tf.io.FixedLenFeature([], tf.int64), # Object boxes and classes. diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py index c6a3951387be..1f79eb347042 100644 --- a/cvat/apps/annotation/annotation.py +++ b/cvat/apps/annotation/annotation.py @@ -424,3 +424,23 @@ def _len(self): @property def frame_info(self): return self._frame_info + + def match_frame(self, filename): + import re + + def get_filename(path): + return os.path.splitext(os.path.basename(path))[0] + + # try to match by filename + _filename = get_filename(filename) + for frame_number, info in self.frame_info.items(): + cvat_filename = get_filename(info["path"]) + if cvat_filename == _filename: + return frame_number + + # try to extract frame number from filename + numbers = re.findall(r"\d+", filename) + if numbers and len(numbers) == 1: + return int(numbers[0]) + + raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename)) diff --git a/cvat/apps/annotation/coco.py b/cvat/apps/annotation/coco.py index 616639f994fc..d69d6dd3de45 100644 --- a/cvat/apps/annotation/coco.py +++ b/cvat/apps/annotation/coco.py @@ -329,26 +329,6 @@ def load(file_object, annotations): from pycocotools import mask as mask_utils import numpy as np - def get_filename(path): - import os - return os.path.splitext(os.path.basename(path))[0] - - def match_frame(frame_info, filename): - import re - # try to match by filename - yolo_filename = get_filename(filename) - for frame_number, info in frame_info.items(): - cvat_filename = get_filename(info["path"]) - if cvat_filename == yolo_filename: - return frame_number - - # try to extract frame number from filename - numbers = re.findall(r"\d+", filename) - if numbers and len(numbers) == 1: - return int(numbers[0]) - - raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename)) - coco = coco_loader.COCO(file_object.name) labels={cat['id']: cat['name'] for cat in coco.loadCats(coco.getCatIds())} @@ -356,7 +336,7 @@ def match_frame(frame_info, filename): for img_id in coco.getImgIds(): anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) img = coco.loadImgs(ids=img_id)[0] - frame_number = match_frame(annotations.frame_info, img['file_name']) + frame_number = annotations.match_frame(img['file_name']) for ann in anns: group = 0 label_name = labels[ann['category_id']] diff --git a/cvat/apps/annotation/pascal_voc.py b/cvat/apps/annotation/pascal_voc.py index 2bcfb8a41c1e..7d6c79a09230 100644 --- a/cvat/apps/annotation/pascal_voc.py +++ b/cvat/apps/annotation/pascal_voc.py @@ -25,31 +25,12 @@ def load(file_object, annotations): from pyunpack import Archive import os - import re from tempfile import TemporaryDirectory - def match_frame(frame_info, filename): - def get_filename(path): - return os.path.splitext(os.path.basename(path))[0] - - # try to match by filename - pascal_filename = get_filename(filename) - for frame_number, info in frame_info.items(): - cvat_filename = get_filename(info['path']) - if cvat_filename == pascal_filename: - return frame_number - - # try to extract framenumber from filename - numbers = re.findall(r'\d+', filename) - if numbers and len(numbers) == 1: - return int(numbers[0]) - - raise Exception('Cannot match filename or determinate framenumber for {} filename'.format(filename)) - def parse_xml_file(annotation_file): import xml.etree.ElementTree as ET root = ET.parse(annotation_file).getroot() - frame_number = match_frame(annotations.frame_info, root.find('filename').text) + frame_number = annotations.match_frame(root.find('filename').text) for obj_tag in root.iter('object'): bbox_tag = obj_tag.find("bndbox") diff --git a/cvat/apps/annotation/tfrecord.py b/cvat/apps/annotation/tfrecord.py index a911625e9901..a80de81babb9 100644 --- a/cvat/apps/annotation/tfrecord.py +++ b/cvat/apps/annotation/tfrecord.py @@ -81,7 +81,7 @@ def create_tf_example(img_id, img_size, image_name, boxes, label_ids): 'image/height': int64_feature(height), 'image/width': int64_feature(width), 'image/filename': bytes_feature(image_name.encode('utf8')), - 'image/source_id': int64_feature(img_id), + 'image/source_id': bytes_feature(img_id), 'image/object/bbox/xmin': float_list_feature(xmins), 'image/object/bbox/xmax': float_list_feature(xmaxs), 'image/object/bbox/ymin': float_list_feature(ymins), @@ -138,7 +138,7 @@ def clamp(value, _min, _max): dataset = tf.data.TFRecordDataset(filenames) image_feature_description = { 'image/filename': tf.io.FixedLenFeature([], tf.string), - 'image/source_id': tf.io.FixedLenFeature([], tf.int64), + 'image/source_id': tf.io.FixedLenFeature([], tf.string), 'image/height': tf.io.FixedLenFeature([], tf.int64), 'image/width': tf.io.FixedLenFeature([], tf.int64), # Object boxes and classes. @@ -152,7 +152,7 @@ def clamp(value, _min, _max): for record in dataset: parsed_record = tf.io.parse_single_example(record, image_feature_description) - frame_number = tf.cast(parsed_record['image/source_id'], tf.int64).numpy().item() + frame_number = annotations.match_frame(parsed_record['image/source_id'].numpy().decode('utf-8')) frame_height = tf.cast(parsed_record['image/height'], tf.int64).numpy().item() frame_width = tf.cast(parsed_record['image/width'], tf.int64).numpy().item() xmins = tf.sparse.to_dense(parsed_record['image/object/bbox/xmin']).numpy() diff --git a/cvat/apps/annotation/yolo.py b/cvat/apps/annotation/yolo.py index 51da4983ac6b..df8c423f0865 100644 --- a/cvat/apps/annotation/yolo.py +++ b/cvat/apps/annotation/yolo.py @@ -22,10 +22,6 @@ ], } -def get_filename(path): - import os - return os.path.splitext(os.path.basename(path))[0] - def load(file_object, annotations): from pyunpack import Archive import os @@ -49,24 +45,8 @@ def parse_yolo_obj(img_size, obj): label_id, x, y, w, h = obj.split(" ") return int(label_id), convert_from_yolo(img_size, (float(x), float(y), float(w), float(h))) - def match_frame(frame_info, filename): - import re - # try to match by filename - yolo_filename = get_filename(filename) - for frame_number, info in frame_info.items(): - cvat_filename = get_filename(info["path"]) - if cvat_filename == yolo_filename: - return frame_number - - # try to extract frame number from filename - numbers = re.findall(r"\d+", filename) - if numbers and len(numbers) == 1: - return int(numbers[0]) - - raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename)) - def parse_yolo_file(annotation_file, labels_mapping): - frame_number = match_frame(annotations.frame_info, annotation_file) + frame_number = annotations.match_frame(annotation_file) with open(annotation_file, "r") as fp: line = fp.readline() while line: @@ -105,6 +85,7 @@ def load_labels(labels_file): def dump(file_object, annotations): from zipfile import ZipFile + import os # convertation formulas are based on https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py # - float values relative to width and height of image @@ -122,7 +103,7 @@ def convert_to_yolo(img_size, box): with ZipFile(file_object, "w") as output_zip: for frame_annotation in annotations.group_by_frame(): image_name = frame_annotation.name - annotation_name = "{}.txt".format(get_filename(image_name)) + annotation_name = "{}.txt".format(os.path.splitext(os.path.basename(image_name))[0]) width = frame_annotation.width height = frame_annotation.height From 937ef98e65def4025b3c30d8099b672e2d98328c Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 11 Oct 2019 18:31:57 +0300 Subject: [PATCH 06/16] extended doc --- cvat/apps/annotation/README.md | 105 +++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 62491dd88d55..9af8606f0b85 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -343,6 +343,111 @@ image_feature_description = { - supported shapes: Rectangles - additional comments: the CVAT task should be created with the full label set that may be in the annotation files +#### How to create a task from TFRecord dataset (from VOC2007 for example) +1. Create label_map.pbtxt file with the following content: +``` +item { + id: 1 + name: 'aeroplane' +} +item { + id: 2 + name: 'bicycle' +} +item { + id: 3 + name: 'bird' +} +item { + id: 4 + name: 'boat' +} +item { + id: 5 + name: 'bottle' +} +item { + id: 6 + name: 'bus' +} +item { + id: 7 + name: 'car' +} +item { + id: 8 + name: 'cat' +} +item { + id: 9 + name: 'chair' +} +item { + id: 10 + name: 'cow' +} +item { + id: 11 + name: 'diningtable' +} +item { + id: 12 + name: 'dog' +} +item { + id: 13 + name: 'horse' +} +item { + id: 14 + name: 'motorbike' +} +item { + id: 15 + name: 'person' +} +item { + id: 16 + name: 'pottedplant' +} +item { + id: 17 + name: 'sheep' +} +item { + id: 18 + name: 'sofa' +} +item { + id: 19 + name: 'train' +} +item { + id: 20 + name: 'tvmonitor' +} +``` +1. Use [create_pascal_tf_record.py](https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py) to convert VOC2007 dataset to TFRecord format. +As example: +``` +python create_pascal_tf_record.py --data_dir --set train --year VOC2007 --output_path pascal.tfrecord --label_map_path label_map.pbtxt +``` +1. Zip train images + ``` + cat /VOC2007/ImageSets/Main/train.txt | while read p; do echo /VOC2007/JPEGImages/${p}.jpg ; done | zip images.zip -j -@ + ``` +1. Create a CVAT task with the following labels: + ``` + aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor + ``` + Select images.zip as data. See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details. +1. Zip pascal.tfrecord and label_map.pbtxt files together + ``` + zip anno.zip -j + ``` +1. Click `Upload annotation` button, choose `TFRecord ZIP 1.0` and select the *.zip file with labels from previous step. It may take some time. + + ### PNG mask #### Dumper description - downloaded file: a zip archive with following structure: From 2e32fc09cfd703fed67b07d3e27493c7d02cffcb Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Fri, 11 Oct 2019 19:20:40 +0300 Subject: [PATCH 07/16] fixed markdown style --- cvat/apps/annotation/README.md | 224 +++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 95 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 9af8606f0b85..934007c7962c 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -157,108 +157,128 @@ It allows to download and upload annotations in different formats and easily add This is native CVAT annotation format. [Detailed format description](cvat/apps/documentation/xml_format.md) -##### CVAT XML for images -downloaded file: Single unpacked XML -supported shapes - Rectangles, Polygons, Polylines, Points +#### CVAT XML for images dumper +- downloaded file: Single unpacked XML +- supported shapes - Rectangles, Polygons, Polylines, Points -##### CVAT XML for videos -downloaded file: Single unpacked XML -supported shapes - Rectangles, Polygons, Polylines, Points +#### CVAT XML for videos dumper +- downloaded file: Single unpacked XML +- supported shapes - Rectangles, Polygons, Polylines, Points #### CVAT XML Loader -uploaded file: Single unpacked XML -supported shapes - Rectangles, Polygons, Polylines, Points - +- uploaded file: Single unpacked XML +- supported shapes - Rectangles, Polygons, Polylines, Points ### [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) -#### Dumper description +#### Pascal dumper description - downloaded file: a zip archive with following structure: - ``` + ```bash taskname.zip ├── frame000001.xml ├── frame000002.xml ├── frame000003.xml └── ... ``` - Each *.xml file contains annotations for a frame with the same name as annotation file. Detailed structure specification of the *.xml file can be found [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf) + Each \*.xml file contains annotations for a frame with the same name as annotation file. + Detailed structure specification of the \*.xml file can be found + [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). - supported shapes - Rectangles -- additional comments: If you plan to use 'truncated' and 'difficult' attributes please add to CVAT labels attributes the corresponding items: +- additional comments: If you plan to use 'truncated' and 'difficult' attributes please add to CVAT + labels attributes the corresponding items: `~checkbox=difficult:false ~checkbox=truncated:false` -#### Loader description -- uploaded file: a zip archive with following structure: - ``` - taskname.zip - ├── frame000001.xml - ├── frame000002.xml - ├── frame000003.xml - └── ... - ``` - Each *.xml file contains annotations for a frame with the same name as annotation file. Note: it should be possible to match the CVAT frame(imagename) and image filename from the annotation *.xml file(tag filename, e.g. `2008_004457.jpg`). There are 2 options: - 1. full match between image name and filename form annotation *.xml file (in case of a task was created from images or archive of images). - 2. match by frame number (if CVAT cannot match by name). If filename tag contains a number CVAT will interpret it as frame number. - -- supported shapes: Rectangles -- limitations: Support of Pascal VOC object detection format -- additional comments: the CVAT task should be created with the full label set that may be in the annotation files +#### Pascal loader description +- uploaded file: a zip archive with following structure: + ```bash + taskname.zip + ├── frame000001.xml + ├── frame000002.xml + ├── frame000003.xml + └── ... + ``` + Each \*.xml file contains annotations for a frame with the same name as annotation file. + Note: it should be possible to match the CVAT frame(imagename) and image filename from the annotation \*.xml + file(tag filename, e.g. `2008_004457.jpg`). There are 2 options: + 1. full match between image name and filename form annotation *.xml + file (in case of a task was created from images or archive of images). + 1. match by frame number (if CVAT cannot match by name). + If filename tag contains a number CVAT will interpret it as frame number. + +- supported shapes: Rectangles +- limitations: Support of Pascal VOC object detection format +- additional comments: the CVAT task should be created with the full label set that may be in the annotation files #### How to create a task from Pascal VOC dataset -1. Download the Pascal Voc dataset -1. Create the CVAT task with the following labels: - ``` - aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor - ``` - You can add `~checkbox=difficult:false ~checkbox=truncated:false` attributes if you want to use it. +1. Download the Pascal Voc dataset +1. Create the CVAT task with the following labels: + ```bash + aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor + ``` + You can add `~checkbox=difficult:false ~checkbox=truncated:false` attributes if you want to use it. - Select interesting image files (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details) -1. zip the corresponding annotation files -1. click `Upload annotation` button, choose `Pascal VOC ZIP 1.0` and select the *.zip file with annotations from previous step. It may take some time. + Select interesting image files + (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) + guide for details) +1. zip the corresponding annotation files +1. click `Upload annotation` button, choose `Pascal VOC ZIP 1.0` +and select the *.zip file with annotations from previous step. +It may take some time. ### [YOLO](https://pjreddie.com/darknet/yolo/) -#### Dumper description +#### Yolo dumper description - downloaded file: a zip archive with following structure: - ``` + ```bash taskname.zip ├── frame000001.txt ├── frame000002.txt ├── ... └── obj.names ``` - Each `*.txt` file contains annotations for a frame with the same name as annotation file. Short description of `*.txt` file structure: each line describes label and bounding box in the following format `label_id cx cy w h`. + Each `*.txt` file contains annotations for a frame with the same name as annotation file. + Short description of `*.txt` file structure: each line describes label and bounding box + in the following format `label_id cx cy w h`. `obj.names` contains the ordered list of label names. - supported shapes - Rectangles -#### Loader description -- uploaded file: a zip archive with following structure: - ``` - taskname.zip - ├── frame000001.txt - ├── frame000002.txt - ├── frame000003.txt - ├── ... - └──obj.names - ``` - Each `*.txt` file contains annotations for a frame with the same name as annotation file. Note: it should be possible to match the CVAT frame(imagename) and annotation filename. There are 2 options: - 1. full match between image name and filename form annotation `*.txt` file (in case of a task was created from images or archive of images). - 2. match by frame number (if CVAT cannot match by name). If name of the *.txt annotation file contains a number CVAT will interpret it as frame number. - -- supported shapes: Rectangles -- additional comments: the CVAT task should be created with the full label set that may be in the annotation files - -#### How to create a task from YOLO formated dataset (from VOC for example) -1. Follow the official [guide](https://pjreddie.com/darknet/yolo/)(see Training YOLO on VOC section) and prepare the YOLO formatted annotation. +#### Yolo loader description +- uploaded file: a zip archive with following structure: + ```bash + taskname.zip + ├── frame000001.txt + ├── frame000002.txt + ├── frame000003.txt + ├── ... + └──obj.names + ``` + Each `*.txt` file contains annotations for a frame with the same name as annotation file. + Note: it should be possible to match the CVAT frame(imagename) and annotation filename. + There are 2 options: + 1. full match between image name and filename form annotation `*.txt` file + (in case of a task was created from images or archive of images). + 1. match by frame number (if CVAT cannot match by name). + If name of the *.txt annotation file contains a number CVAT will interpret it as frame number. + +- supported shapes: Rectangles +- additional comments: the CVAT task should be created with the full label set that may be in the annotation files + +#### How to create a task from YOLO formatted dataset (from VOC for example) +1. Follow the official [guide](https://pjreddie.com/darknet/yolo/)(see Training YOLO on VOC section) + and prepare the YOLO formatted annotation. 1. Zip train images - ``` + ```bash zip images.zip -j -@ < train.txt ``` 1. Create a CVAT task with the following labels: - ``` + ```bash aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor ``` - Select images.zip as data. Most likely you should use `share` functionality because size of images.zip is more then 500Mb. See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details. + Select images.zip as data. Most likely you should use `share` + functionality because size of images.zip is more than 500Mb. + See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) + guide for details. 1. Create `obj.names` with the following content: - ``` + ```bash aeroplane bicycle bird @@ -281,36 +301,44 @@ supported shapes - Rectangles, Polygons, Polylines, Points tvmonitor ``` 1. Zip all label files together (we need add only label files that correspond train subset) - ``` + ```bash cat train.txt | while read p; do echo ${p%/*/*}/labels/${${p##*/}%%.*}.txt; done | zip labels.zip -j -@ obj.names ``` -1. Click `Upload annotation` button, choose `YOLO ZIP 1.0` and select the *.zip file with labels from previous step. It may take some time. +1. Click `Upload annotation` button, choose `YOLO ZIP 1.0` and select the *.zip file with labels from previous step. + It may take some time. ### [MS COCO Object Detection](http://cocodataset.org/#format-data) -#### Dumper description +#### COCO dumper description - downloaded file: single unpacked `json`. Detailed description of the MS COCO format can be found [here](http://cocodataset.org/#format-data) - supported shapes - Polygons, Rectangles (interpreted as polygons) -#### Loader description +#### COCO loader description - uploaded file: single unpacked `*.json`. - supported shapes: Polygons - additional comments: the CVAT task should be created with the full label set that may be in the annotation files #### How to create a task from MS COCO dataset -1. Download the [MS COCO dataset](http://cocodataset.org/#download). For example [2017 Val images](http://images.cocodataset.org/zips/val2017.zip) and [2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). -1. Create a CVAT task with the following labels: - ``` - person bicycle car motorcycle airplane bus train truck boat "traffic light" "fire hydrant" "stop sign" "parking meter" bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard "sports ball" kite "baseball bat" "baseball glove" skateboard surfboard "tennis racket" bottle "wine glass" cup fork knife spoon bowl banana apple sandwich orange broccoli carrot "hot dog" pizza donut cake chair couch "potted plant" bed "dining table" toilet tv laptop mouse remote keyboard "cell phone" microwave oven toaster sink refrigerator book clock vase scissors "teddy bear" "hair drier" toothbrush - ``` +1. Download the [MS COCO dataset](http://cocodataset.org/#download). + For example [2017 Val images](http://images.cocodataset.org/zips/val2017.zip) + and [2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). +1. Create a CVAT task with the following labels: + ```bash + person bicycle car motorcycle airplane bus train truck boat "traffic light" "fire hydrant" "stop sign" "parking meter" bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard "sports ball" kite "baseball bat" "baseball glove" skateboard surfboard "tennis racket" bottle "wine glass" cup fork knife spoon bowl banana apple sandwich orange broccoli carrot "hot dog" pizza donut cake chair couch "potted plant" bed "dining table" toilet tv laptop mouse remote keyboard "cell phone" microwave oven toaster sink refrigerator book clock vase scissors "teddy bear" "hair drier" toothbrush + ``` - Select val2017.zip as data (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details) -1. unpack annotations_trainval2017.zip -1. click `Upload annotation` button, choose `COCO JSON 1.0` and select `instances_val2017.json.json` annotation file. It may take some time. + Select val2017.zip as data + (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) + guide for details) +1. unpack annotations_trainval2017.zip +1. click `Upload annotation` button, + choose `COCO JSON 1.0` and select `instances_val2017.json.json` annotation file. It may take some time. ### [TFRecord](https://www.tensorflow.org/tutorials/load_data/tf_records) -TFRecord is a very flexible format, but we try to correspond the format that used in [TF object detection](https://github.com/tensorflow/models/tree/master/research/object_detection) with minimal modifications. +TFRecord is a very flexible format, but we try to correspond the format that used in +[TF object detection](https://github.com/tensorflow/models/tree/master/research/object_detection) +with minimal modifications. Used feature description: -``` +```python image_feature_description = { 'image/filename': tf.io.FixedLenFeature([], tf.string), 'image/source_id': tf.io.FixedLenFeature([], tf.string), @@ -325,18 +353,18 @@ image_feature_description = { 'image/object/class/text': tf.io.VarLenFeature(tf.string), } ``` -#### Dumper description +#### TFRecord dumper description - downloaded file: a zip archive with following structure: - ``` + ```bash taskname.zip ├── task2.tfrecord └── label_map.pbtxt ``` - supported shapes - Rectangles -#### Loader description +#### TFRecord loader description - uploaded file: a zip archive with following structure: - ``` + ```bash taskname.zip └── task2.tfrecord ``` @@ -345,7 +373,7 @@ image_feature_description = { #### How to create a task from TFRecord dataset (from VOC2007 for example) 1. Create label_map.pbtxt file with the following content: -``` +```js item { id: 1 name: 'aeroplane' @@ -427,31 +455,34 @@ item { name: 'tvmonitor' } ``` -1. Use [create_pascal_tf_record.py](https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py) to convert VOC2007 dataset to TFRecord format. +1. Use [create_pascal_tf_record.py](https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py) +to convert VOC2007 dataset to TFRecord format. As example: -``` +```bash python create_pascal_tf_record.py --data_dir --set train --year VOC2007 --output_path pascal.tfrecord --label_map_path label_map.pbtxt ``` 1. Zip train images - ``` + ```bash cat /VOC2007/ImageSets/Main/train.txt | while read p; do echo /VOC2007/JPEGImages/${p}.jpg ; done | zip images.zip -j -@ ``` 1. Create a CVAT task with the following labels: - ``` + ```bash aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor ``` - Select images.zip as data. See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) guide for details. + Select images.zip as data. + See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) + guide for details. 1. Zip pascal.tfrecord and label_map.pbtxt files together - ``` + ```bash zip anno.zip -j ``` -1. Click `Upload annotation` button, choose `TFRecord ZIP 1.0` and select the *.zip file with labels from previous step. It may take some time. - +1. Click `Upload annotation` button, choose `TFRecord ZIP 1.0` and select the *.zip file + with labels from previous step. It may take some time. ### PNG mask -#### Dumper description +#### Mask dumper description - downloaded file: a zip archive with following structure: - ``` + ```bash taskname.zip ├── frame000001.png ├── frame000002.png @@ -459,9 +490,12 @@ python create_pascal_tf_record.py --data_dir --set train --y ├── ... └── colormap.txt ``` - Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label. Color generation correspond to the Pascal VOC color generation [algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap). (0, 0, 0) is used for background. + Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label. + Color generation correspond to the Pascal VOC color generation + [algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap). + (0, 0, 0) is used for background. colormap.txt file contains the values of used colors in RGB format. - supported shapes - Rectangles, Polygons -#### Loader description +#### Mask loader description Not supported From ca9b4df5126b9de82784e1f5fedbaf335c2b245d Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 14 Oct 2019 11:00:33 +0300 Subject: [PATCH 08/16] fixed typos --- cvat/apps/annotation/README.md | 8 ++++---- cvat/apps/annotation/tfrecord.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 934007c7962c..d53576236c89 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -180,7 +180,7 @@ This is native CVAT annotation format. ├── frame000003.xml └── ... ``` - Each \*.xml file contains annotations for a frame with the same name as annotation file. + Each *.xml file contains annotations for a frame with the same name as annotation file. Detailed structure specification of the \*.xml file can be found [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). - supported shapes - Rectangles @@ -300,7 +300,7 @@ It may take some time. train tvmonitor ``` -1. Zip all label files together (we need add only label files that correspond train subset) +1. Zip all label files together (we need to add only label files that correspond to the train subset) ```bash cat train.txt | while read p; do echo ${p%/*/*}/labels/${${p##*/}%%.*}.txt; done | zip labels.zip -j -@ obj.names ``` @@ -314,7 +314,7 @@ It may take some time. #### COCO loader description - uploaded file: single unpacked `*.json`. -- supported shapes: Polygons +- supported shapes: Polygons (the `segmentation` must not be empty) - additional comments: the CVAT task should be created with the full label set that may be in the annotation files #### How to create a task from MS COCO dataset @@ -476,7 +476,7 @@ python create_pascal_tf_record.py --data_dir --set train --y ```bash zip anno.zip -j ``` -1. Click `Upload annotation` button, choose `TFRecord ZIP 1.0` and select the *.zip file +1. Click `Upload annotation` button, choose `TFRecord ZIP 1.0` and select the *.zip file with labels from previous step. It may take some time. ### PNG mask diff --git a/cvat/apps/annotation/tfrecord.py b/cvat/apps/annotation/tfrecord.py index a80de81babb9..edea2d08d498 100644 --- a/cvat/apps/annotation/tfrecord.py +++ b/cvat/apps/annotation/tfrecord.py @@ -81,7 +81,7 @@ def create_tf_example(img_id, img_size, image_name, boxes, label_ids): 'image/height': int64_feature(height), 'image/width': int64_feature(width), 'image/filename': bytes_feature(image_name.encode('utf8')), - 'image/source_id': bytes_feature(img_id), + 'image/source_id': bytes_feature(str(img_id).encode('utf8')), 'image/object/bbox/xmin': float_list_feature(xmins), 'image/object/bbox/xmax': float_list_feature(xmaxs), 'image/object/bbox/ymin': float_list_feature(ymins), From 65168d570ec2ee0b50ac4661bc7bb9b594dbef71 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Mon, 14 Oct 2019 11:39:03 +0300 Subject: [PATCH 09/16] fixed 0.14.0 version for scikit --- cvat/requirements/base.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index f5ec239e53b1..cbc9888f4dbf 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -38,7 +38,7 @@ pascal_voc_writer==0.1.4 django-rest-auth[with_social]==0.9.5 cython==0.29.13 matplotlib==3.0.3 -scikit-image>=0.14.0 +scikit-image==0.14.0 tensorflow==1.12.3 django-cors-headers==3.0.2 furl==2.0.0 From 0163c3ee061a91930e7e91a7106d14cdbfb4f9da Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 15 Oct 2019 17:00:59 +0300 Subject: [PATCH 10/16] updated README improved match_frame function --- cvat/apps/annotation/README.md | 54 +++++++++++++++--------------- cvat/apps/annotation/annotation.py | 26 +++++++------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index d53576236c89..8c6bbdb7b3b9 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -175,12 +175,13 @@ This is native CVAT annotation format. - downloaded file: a zip archive with following structure: ```bash taskname.zip - ├── frame000001.xml - ├── frame000002.xml - ├── frame000003.xml + ├── frame_000001.xml + ├── frame_000002.xml + ├── frame_000003.xml └── ... ``` - Each *.xml file contains annotations for a frame with the same name as annotation file. + Each annotation `*.xml` file has a name that corresponds to the name of the image file + (e.g. `frame_000001.txt` is the annotation for the `frame_000001.jpg` image). Detailed structure specification of the \*.xml file can be found [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). - supported shapes - Rectangles @@ -192,18 +193,17 @@ This is native CVAT annotation format. - uploaded file: a zip archive with following structure: ```bash taskname.zip - ├── frame000001.xml - ├── frame000002.xml - ├── frame000003.xml + ├── frame_000001.xml + ├── frame_000002.xml + ├── frame_000003.xml └── ... ``` - Each \*.xml file contains annotations for a frame with the same name as annotation file. - Note: it should be possible to match the CVAT frame(imagename) and image filename from the annotation \*.xml + It should be possible to match the CVAT frame(imagename) and image filename from the annotation \*.xml file(tag filename, e.g. `2008_004457.jpg`). There are 2 options: 1. full match between image name and filename form annotation *.xml - file (in case of a task was created from images or archive of images). - 1. match by frame number (if CVAT cannot match by name). - If filename tag contains a number CVAT will interpret it as frame number. + file (in case of a task was created from images or archive of images). + 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `frame_%6d.jpg`. + It will be used when task was created from video for example. - supported shapes: Rectangles - limitations: Support of Pascal VOC object detection format @@ -230,12 +230,13 @@ It may take some time. - downloaded file: a zip archive with following structure: ```bash taskname.zip - ├── frame000001.txt - ├── frame000002.txt + ├── frame_000001.txt + ├── frame_000002.txt ├── ... └── obj.names ``` - Each `*.txt` file contains annotations for a frame with the same name as annotation file. + Each annotation `*.txt` file has a name that corresponds to the name of the image file + (e.g. `frame_000001.txt` is the annotation for the `frame_000001.jpg` image). Short description of `*.txt` file structure: each line describes label and bounding box in the following format `label_id cx cy w h`. `obj.names` contains the ordered list of label names. @@ -245,19 +246,18 @@ It may take some time. - uploaded file: a zip archive with following structure: ```bash taskname.zip - ├── frame000001.txt - ├── frame000002.txt - ├── frame000003.txt + ├── frame_000001.txt + ├── frame_000002.txt + ├── frame_000003.txt ├── ... └──obj.names ``` - Each `*.txt` file contains annotations for a frame with the same name as annotation file. - Note: it should be possible to match the CVAT frame(imagename) and annotation filename. + It should be possible to match the CVAT frame(imagename) and annotation filename There are 2 options: - 1. full match between image name and filename form annotation `*.txt` file + 1. full match between image name and name of annotation `*.txt` file (in case of a task was created from images or archive of images). - 1. match by frame number (if CVAT cannot match by name). - If name of the *.txt annotation file contains a number CVAT will interpret it as frame number. + 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `frame_%6d.jpg`. + It will be used when task was created from video for example. - supported shapes: Rectangles - additional comments: the CVAT task should be created with the full label set that may be in the annotation files @@ -484,9 +484,9 @@ python create_pascal_tf_record.py --data_dir --set train --y - downloaded file: a zip archive with following structure: ```bash taskname.zip - ├── frame000001.png - ├── frame000002.png - ├── frame000003.png + ├── frame_000001.png + ├── frame_000002.png + ├── frame_000003.png ├── ... └── colormap.txt ``` @@ -494,7 +494,7 @@ python create_pascal_tf_record.py --data_dir --set train --y Color generation correspond to the Pascal VOC color generation [algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap). (0, 0, 0) is used for background. - colormap.txt file contains the values of used colors in RGB format. + `colormap.txt` file contains the values of the used colors in RGB format. - supported shapes - Rectangles, Polygons #### Mask loader description diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py index 1f79eb347042..0a7618e35d9b 100644 --- a/cvat/apps/annotation/annotation.py +++ b/cvat/apps/annotation/annotation.py @@ -118,6 +118,8 @@ def __init__(self, annotation_ir, db_task, scheme='', host='', create_callback=N self._host = host self._create_callback=create_callback self._MAX_ANNO_SIZE=30000 + self._frame_info = {} + self._frame_mapping = {} db_labels = self._db_task.label_set.all().prefetch_related('attributespec_set').order_by('pk') @@ -189,6 +191,10 @@ def _init_frame_info(self): "height": db_image.height, } for db_image in self._db_task.image_set.all()} + self._frame_mapping = { + self._get_filename(info["path"]): frame for frame, info in self._frame_info.items() + } + def _init_meta(self): db_segments = self._db_task.segment_set.all().prefetch_related('job_set') self._meta = OrderedDict([ @@ -425,22 +431,16 @@ def _len(self): def frame_info(self): return self._frame_info + @staticmethod + def _get_filename(path): + return os.path.splitext(os.path.basename(path))[0] + def match_frame(self, filename): import re - def get_filename(path): - return os.path.splitext(os.path.basename(path))[0] - # try to match by filename - _filename = get_filename(filename) - for frame_number, info in self.frame_info.items(): - cvat_filename = get_filename(info["path"]) - if cvat_filename == _filename: - return frame_number - - # try to extract frame number from filename - numbers = re.findall(r"\d+", filename) - if numbers and len(numbers) == 1: - return int(numbers[0]) + _filename = self._get_filename(filename) + if _filename in self._frame_mapping: + return self._frame_mapping[_filename] raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename)) From 199acf78f87cc7feb2975b37c49510a9383a600e Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Thu, 17 Oct 2019 11:53:28 +0300 Subject: [PATCH 11/16] added unit tests for dump/load --- cvat/apps/annotation/tfrecord.py | 2 +- cvat/apps/engine/tests/test_rest_api.py | 301 ++++++++++++++++++++---- 2 files changed, 261 insertions(+), 42 deletions(-) diff --git a/cvat/apps/annotation/tfrecord.py b/cvat/apps/annotation/tfrecord.py index edea2d08d498..654ef67563ea 100644 --- a/cvat/apps/annotation/tfrecord.py +++ b/cvat/apps/annotation/tfrecord.py @@ -152,7 +152,7 @@ def clamp(value, _min, _max): for record in dataset: parsed_record = tf.io.parse_single_example(record, image_feature_description) - frame_number = annotations.match_frame(parsed_record['image/source_id'].numpy().decode('utf-8')) + frame_number = annotations.match_frame(parsed_record['image/filename'].numpy().decode('utf-8')) frame_height = tf.cast(parsed_record['image/height'], tf.int64).numpy().item() frame_width = tf.cast(parsed_record['image/width'], tf.int64).numpy().item() xmins = tf.sparse.to_dense(parsed_record['image/object/bbox/xmin']).numpy() diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 74caae3f685a..6922014a5e53 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -18,6 +18,9 @@ import io import xml.etree.ElementTree as ET from collections import defaultdict +import zipfile +from pycocotools import coco as coco_loader +import tempfile def create_db_users(cls): (group_admin, _) = Group.objects.get_or_create(name="admin") @@ -1540,7 +1543,7 @@ def test_api_v1_tasks_id_data_no_auth(self): response = self._create_task(None, data) self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) -def compare_objects(self, obj1, obj2, ignore_keys): +def compare_objects(self, obj1, obj2, ignore_keys, fp_tolerance=.001): if isinstance(obj1, dict): self.assertTrue(isinstance(obj2, dict), "{} != {}".format(obj1, obj2)) for k in obj1.keys(): @@ -1553,7 +1556,10 @@ def compare_objects(self, obj1, obj2, ignore_keys): for v1, v2 in zip(obj1, obj2): compare_objects(self, v1, v2, ignore_keys) else: - self.assertEqual(obj1, obj2) + if isinstance(obj1, float) or (obj2, float): + self.assertAlmostEqual(obj1, obj2, delta=fp_tolerance) + else: + self.assertEqual(obj1, obj2) class JobAnnotationAPITestCase(APITestCase): def setUp(self): @@ -2117,6 +2123,16 @@ def _patch_api_v1_tasks_id_annotations(self, pk, user, action, data): return response + def _upload_api_v1_tasks_id_annotations(self, pk, user, data, query_params=""): + with ForceLogin(user, self.client): + response = self.client.put( + path="/api/v1/tasks/{0}/annotations?{1}".format(pk, query_params), + data=data, + format="multipart", + ) + + return response + def _check_response(self, response, data): if not response.status_code in [ status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN]: @@ -2503,51 +2519,245 @@ def _run_api_v1_tasks_id_annotations(self, owner, assignee, annotator): "create", data) self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST) - cvat_format = AnnotationFormat.objects.get(name="CVAT") - for annotation_handler in cvat_format.annotationdumper_set.all(): - response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "format={}".format(annotation_handler.display_name)) - self.assertEqual(response.status_code, HTTP_202_ACCEPTED) - - response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "format={}".format(annotation_handler.display_name)) - self.assertEqual(response.status_code, HTTP_201_CREATED) - - response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "action=download&format={}".format(annotation_handler.display_name)) - self.assertEqual(response.status_code, HTTP_200_OK) - self._check_dump_response(response, task, jobs, data) - - def _check_dump_response(self, response, task, jobs, data): - if response.status_code == status.HTTP_200_OK: - def etree_to_dict(t): - d = {t.tag: {} if t.attrib else None} - children = list(t) - if children: - dd = defaultdict(list) - for dc in map(etree_to_dict, children): - for k, v in dc.items(): - dd[k].append(v) - d = {t.tag: {k: v[0] if len(v) == 1 else v - for k, v in dd.items()}} - if t.attrib: - d[t.tag].update(('@' + k, v) for k, v in t.attrib.items()) - if t.text: - text = t.text.strip() - if not (children or t.attrib): - d[t.tag] = text - return d - - self.assertTrue(response.streaming) - content = io.BytesIO(b''.join(response.streaming_content)) - xmldump = ET.fromstring(content.read()) + def _run_api_v1_tasks_id_annotations_dump_load(self, owner, assignee, annotator): + if annotator: + HTTP_200_OK = status.HTTP_200_OK + HTTP_204_NO_CONTENT = status.HTTP_204_NO_CONTENT + HTTP_400_BAD_REQUEST = status.HTTP_400_BAD_REQUEST + HTTP_202_ACCEPTED = status.HTTP_202_ACCEPTED + HTTP_201_CREATED = status.HTTP_201_CREATED + else: + HTTP_200_OK = status.HTTP_401_UNAUTHORIZED + HTTP_204_NO_CONTENT = status.HTTP_401_UNAUTHORIZED + HTTP_400_BAD_REQUEST = status.HTTP_401_UNAUTHORIZED + HTTP_202_ACCEPTED = status.HTTP_401_UNAUTHORIZED + HTTP_201_CREATED = status.HTTP_401_UNAUTHORIZED + + def _get_initial_annotation(annotation_format): + rectangle_tracks_with_attrs = [{ + "frame": 0, + "label_id": task["labels"][0]["id"], + "group": 0, + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][0] + }, + ], + "shapes": [ + { + "frame": 0, + "points": [1.0, 2.1, 50.1, 30.22], + "type": "rectangle", + "occluded": False, + "outside": False, + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][1]["id"], + "value": task["labels"][0]["attributes"][1]["default_value"] + } + ] + }, + { + "frame": 1, + "points": [2.0, 2.1, 77.2, 36.22], + "type": "rectangle", + "occluded": True, + "outside": True, + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][1]["id"], + "value": task["labels"][0]["attributes"][1]["default_value"] + } + ] + }, + ] + }] + rectangle_tracks_wo_attrs = [{ + "frame": 1, + "label_id": task["labels"][1]["id"], + "group": 0, + "attributes": [], + "shapes": [ + { + "frame": 1, + "attributes": [], + "points": [1.0, 2.1, 50.2, 36.6], + "type": "rectangle", + "occluded": False, + "outside": False + }, + { + "frame": 2, + "attributes": [], + "points": [1.0, 2.1, 51, 36.6], + "type": "rectangle", + "occluded": False, + "outside": True + } + ] + }] + + rectangle_shapes_with_attrs = [{ + "frame": 0, + "label_id": task["labels"][0]["id"], + "group": 0, + "attributes": [ + { + "spec_id": task["labels"][0]["attributes"][0]["id"], + "value": task["labels"][0]["attributes"][0]["values"][0] + }, + { + "spec_id": task["labels"][0]["attributes"][1]["id"], + "value": task["labels"][0]["attributes"][1]["default_value"] + } + ], + "points": [1.0, 2.1, 10.6, 53.22], + "type": "rectangle", + "occluded": False + }] + + rectangle_shapes_wo_attrs = [{ + "frame": 1, + "label_id": task["labels"][1]["id"], + "group": 0, + "attributes": [], + "points": [2.0, 2.1, 40, 50.7], + "type": "rectangle", + "occluded": False + }] + polygon_shapes_wo_attrs = [{ + "frame": 1, + "label_id": task["labels"][1]["id"], + "group": 0, + "attributes": [], + "points": [2.0, 2.1, 100, 30.22, 40, 77, 1, 3], + "type": "polygon", + "occluded": False + }] + + annotations = { + "version": 0, + "tags": [], + "shapes": [], + "tracks": [], + } + if annotation_format == "CVAT XML 1.1 for videos": + annotations["tracks"] = rectangle_tracks_with_attrs + rectangle_tracks_wo_attrs + + elif annotation_format == "CVAT XML 1.1 for images": + annotations["shapes"] = rectangle_shapes_with_attrs + rectangle_shapes_wo_attrs + + elif annotation_format == "PASCAL VOC ZIP 1.0" or \ + annotation_format == "YOLO ZIP 1.0" or \ + annotation_format == "TFRecord ZIP 1.0": + annotations["shapes"] = rectangle_shapes_wo_attrs + + elif annotation_format == "COCO JSON 1.0": + annotations["shapes"] = polygon_shapes_wo_attrs + + elif annotation_format == "MASK ZIP 1.0": + annotations["shapes"] = rectangle_shapes_with_attrs + rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs + annotations["tracks"] = rectangle_tracks_with_attrs + rectangle_tracks_wo_attrs + + return annotations + + for annotation_format in AnnotationFormat.objects.all(): + for dumper in annotation_format.annotationdumper_set.all(): + # 1. create task + task, jobs = self._create_task(owner, assignee) + + # 2. add annotation + data = _get_initial_annotation(dumper.display_name) + response = self._put_api_v1_tasks_id_annotations(task["id"], annotator, data) + data["version"] += 1 + + self.assertEqual(response.status_code, HTTP_200_OK) + self._check_response(response, data) + + # 3. download annotation + response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, + "format={}".format(dumper.display_name)) + self.assertEqual(response.status_code, HTTP_202_ACCEPTED) + + response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, + "format={}".format(dumper.display_name)) + self.assertEqual(response.status_code, HTTP_201_CREATED) + + response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, + "action=download&format={}".format(dumper.display_name)) + self.assertEqual(response.status_code, HTTP_200_OK) + + # 4. check downloaded data + if response.status_code == status.HTTP_200_OK: + self.assertTrue(response.streaming) + content = io.BytesIO(b"".join(response.streaming_content)) + self._check_dump_content(content, task, jobs, data, annotation_format) + content.seek(0) + + # 5. remove annotation form the task + response = self._delete_api_v1_tasks_id_annotations(task["id"], annotator) + data["version"] += 1 + self.assertEqual(response.status_code, HTTP_204_NO_CONTENT) + + # 6. upload annotation and check annotation + uploaded_data = { + "annotation_file": content, + } + + for loader in annotation_format.annotationloader_set.all(): + response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, uploaded_data, "format={}".format(loader.display_name)) + self.assertEqual(response.status_code, HTTP_202_ACCEPTED) + + response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, {}, "format={}".format(loader.display_name)) + self.assertEqual(response.status_code, HTTP_201_CREATED) + + response = self._get_api_v1_tasks_id_annotations(task["id"], annotator) + self.assertEqual(response.status_code, HTTP_200_OK) + data["version"] += 2 # upload is delete + put + self._check_response(response, data) + + def _check_dump_content(self, content, task, jobs, data, annotation_format): + def etree_to_dict(t): + d = {t.tag: {} if t.attrib else None} + children = list(t) + if children: + dd = defaultdict(list) + for dc in map(etree_to_dict, children): + for k, v in dc.items(): + dd[k].append(v) + d = {t.tag: {k: v[0] if len(v) == 1 else v + for k, v in dd.items()}} + if t.attrib: + d[t.tag].update(('@' + k, v) for k, v in t.attrib.items()) + if t.text: + text = t.text.strip() + if not (children or t.attrib): + d[t.tag] = text + return d + + if annotation_format.name == "CVAT": + xmldump = ET.fromstring(content.read()) self.assertEqual(xmldump.tag, "annotations") tags = xmldump.findall("./meta") self.assertEqual(len(tags), 1) meta = etree_to_dict(tags[0])["meta"] self.assertEqual(meta["task"]["name"], task["name"]) - + elif annotation_format.name == "PASCAL VOC": + self.assertTrue(zipfile.is_zipfile(content)) + elif annotation_format.name == "YOLO": + self.assertTrue(zipfile.is_zipfile(content)) + elif annotation_format.name == "COCO": + with tempfile.NamedTemporaryFile() as tmp_file: + tmp_file.write(content.read()) + tmp_file.flush() + coco = coco_loader.COCO(tmp_file.name) + self.assertTrue(coco.getAnnIds()) + elif annotation_format.name == "TFRecord": + self.assertTrue(zipfile.is_zipfile(content)) + elif annotation_format.name == "MASK": + self.assertTrue(zipfile.is_zipfile(content)) def test_api_v1_tasks_id_annotations_admin(self): self._run_api_v1_tasks_id_annotations(self.admin, self.assignee, @@ -2560,7 +2770,16 @@ def test_api_v1_tasks_id_annotations_user(self): def test_api_v1_tasks_id_annotations_no_auth(self): self._run_api_v1_tasks_id_annotations(self.user, self.assignee, None) + def test_api_v1_tasks_id_annotations_dump_load_admin(self): + self._run_api_v1_tasks_id_annotations_dump_load(self.admin, self.assignee, + self.assignee) + + def test_api_v1_tasks_id_annotations_dump_load_user(self): + self._run_api_v1_tasks_id_annotations_dump_load(self.user, self.assignee, + self.assignee) + def test_api_v1_tasks_id_annotations_dump_load_no_auth(self): + self._run_api_v1_tasks_id_annotations_dump_load(self.user, self.assignee, None) class ServerShareAPITestCase(APITestCase): def setUp(self): From 238e0e6fc26bbef235bc3fce0e4b8ffba816b418 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Thu, 17 Oct 2019 11:57:14 +0300 Subject: [PATCH 12/16] removed unused import --- cvat/apps/annotation/annotation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py index 0a7618e35d9b..a592dd91b0e8 100644 --- a/cvat/apps/annotation/annotation.py +++ b/cvat/apps/annotation/annotation.py @@ -436,8 +436,6 @@ def _get_filename(path): return os.path.splitext(os.path.basename(path))[0] def match_frame(self, filename): - import re - # try to match by filename _filename = self._get_filename(filename) if _filename in self._frame_mapping: From c43c00f98d56612d7b33228291bf56b43dfcf7bc Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Thu, 17 Oct 2019 12:51:05 +0300 Subject: [PATCH 13/16] update readme --- cvat/apps/annotation/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index 8c6bbdb7b3b9..b04cdaea4ee7 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -182,11 +182,11 @@ This is native CVAT annotation format. ``` Each annotation `*.xml` file has a name that corresponds to the name of the image file (e.g. `frame_000001.txt` is the annotation for the `frame_000001.jpg` image). - Detailed structure specification of the \*.xml file can be found + Detailed structure specification of the `*.xml` file can be found [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). - supported shapes - Rectangles -- additional comments: If you plan to use 'truncated' and 'difficult' attributes please add to CVAT - labels attributes the corresponding items: +- additional comments: If you plan to use 'truncated' and 'difficult' attributes please add the corresponding + items to the CVAT label attributes: `~checkbox=difficult:false ~checkbox=truncated:false` #### Pascal loader description @@ -203,7 +203,7 @@ This is native CVAT annotation format. 1. full match between image name and filename form annotation *.xml file (in case of a task was created from images or archive of images). 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `frame_%6d.jpg`. - It will be used when task was created from video for example. + It will be used when task was created from a video. - supported shapes: Rectangles - limitations: Support of Pascal VOC object detection format @@ -211,7 +211,7 @@ This is native CVAT annotation format. #### How to create a task from Pascal VOC dataset 1. Download the Pascal Voc dataset -1. Create the CVAT task with the following labels: +1. Create a CVAT task with the following labels: ```bash aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor ``` @@ -257,7 +257,7 @@ It may take some time. 1. full match between image name and name of annotation `*.txt` file (in case of a task was created from images or archive of images). 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `frame_%6d.jpg`. - It will be used when task was created from video for example. + It will be used when task was created from a video. - supported shapes: Rectangles - additional comments: the CVAT task should be created with the full label set that may be in the annotation files From 9db677204056b8e85f74bbb3140a87aa33322fc9 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Thu, 17 Oct 2019 13:04:04 +0300 Subject: [PATCH 14/16] Fixed codacy issues --- cvat/apps/engine/tests/test_rest_api.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 6922014a5e53..fb9d73c94fdf 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2139,19 +2139,15 @@ def _check_response(self, response, data): compare_objects(self, data, response.data, ignore_keys=["id"]) def _run_api_v1_tasks_id_annotations(self, owner, assignee, annotator): - task, jobs = self._create_task(owner, assignee) + task, _ = self._create_task(owner, assignee) if annotator: HTTP_200_OK = status.HTTP_200_OK HTTP_204_NO_CONTENT = status.HTTP_204_NO_CONTENT HTTP_400_BAD_REQUEST = status.HTTP_400_BAD_REQUEST - HTTP_202_ACCEPTED = status.HTTP_202_ACCEPTED - HTTP_201_CREATED = status.HTTP_201_CREATED else: HTTP_200_OK = status.HTTP_401_UNAUTHORIZED HTTP_204_NO_CONTENT = status.HTTP_401_UNAUTHORIZED HTTP_400_BAD_REQUEST = status.HTTP_401_UNAUTHORIZED - HTTP_202_ACCEPTED = status.HTTP_401_UNAUTHORIZED - HTTP_201_CREATED = status.HTTP_401_UNAUTHORIZED data = { "version": 0, @@ -2523,13 +2519,11 @@ def _run_api_v1_tasks_id_annotations_dump_load(self, owner, assignee, annotator) if annotator: HTTP_200_OK = status.HTTP_200_OK HTTP_204_NO_CONTENT = status.HTTP_204_NO_CONTENT - HTTP_400_BAD_REQUEST = status.HTTP_400_BAD_REQUEST HTTP_202_ACCEPTED = status.HTTP_202_ACCEPTED HTTP_201_CREATED = status.HTTP_201_CREATED else: HTTP_200_OK = status.HTTP_401_UNAUTHORIZED HTTP_204_NO_CONTENT = status.HTTP_401_UNAUTHORIZED - HTTP_400_BAD_REQUEST = status.HTTP_401_UNAUTHORIZED HTTP_202_ACCEPTED = status.HTTP_401_UNAUTHORIZED HTTP_201_CREATED = status.HTTP_401_UNAUTHORIZED From c8c0efbbb941ab30db794b75b7de18d0447bc0ac Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 29 Oct 2019 19:11:31 +0300 Subject: [PATCH 15/16] fixed comments --- cvat/apps/annotation/README.md | 15 ++++---- cvat/apps/engine/tests/test_rest_api.py | 49 ++++++++++++++++--------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/annotation/README.md index b04cdaea4ee7..e7cc3f7d38d4 100644 --- a/cvat/apps/annotation/README.md +++ b/cvat/apps/annotation/README.md @@ -181,7 +181,7 @@ This is native CVAT annotation format. └── ... ``` Each annotation `*.xml` file has a name that corresponds to the name of the image file - (e.g. `frame_000001.txt` is the annotation for the `frame_000001.jpg` image). + (e.g. `frame_000001.xml` is the annotation for the `frame_000001.jpg` image). Detailed structure specification of the `*.xml` file can be found [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). - supported shapes - Rectangles @@ -199,8 +199,8 @@ This is native CVAT annotation format. └── ... ``` It should be possible to match the CVAT frame(imagename) and image filename from the annotation \*.xml - file(tag filename, e.g. `2008_004457.jpg`). There are 2 options: - 1. full match between image name and filename form annotation *.xml + file (the tag filename, e.g. `2008_004457.jpg`). There are 2 options: + 1. full match between image name and filename from annotation *.xml file (in case of a task was created from images or archive of images). 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `frame_%6d.jpg`. It will be used when task was created from a video. @@ -210,12 +210,13 @@ This is native CVAT annotation format. - additional comments: the CVAT task should be created with the full label set that may be in the annotation files #### How to create a task from Pascal VOC dataset -1. Download the Pascal Voc dataset +1. Download the Pascal Voc dataset (Can be downloaded from the + [PASCAL VOC website](http://host.robots.ox.ac.uk/pascal/VOC/)) 1. Create a CVAT task with the following labels: ```bash aeroplane bicycle bird boat bottle bus car cat chair cow diningtable dog horse motorbike person pottedplant sheep sofa train tvmonitor ``` - You can add `~checkbox=difficult:false ~checkbox=truncated:false` attributes if you want to use it. + You can add `~checkbox=difficult:false ~checkbox=truncated:false` attributes for each label if you want to use them. Select interesting image files (See [Creating an annotation task](cvat/apps/documentation/user_guide.md#creating-an-annotation-task) @@ -264,7 +265,7 @@ It may take some time. #### How to create a task from YOLO formatted dataset (from VOC for example) 1. Follow the official [guide](https://pjreddie.com/darknet/yolo/)(see Training YOLO on VOC section) - and prepare the YOLO formatted annotation. + and prepare the YOLO formatted annotation files. 1. Zip train images ```bash zip images.zip -j -@ < train.txt @@ -481,7 +482,7 @@ python create_pascal_tf_record.py --data_dir --set train --y ### PNG mask #### Mask dumper description -- downloaded file: a zip archive with following structure: +- downloaded file: a zip archive with the following structure: ```bash taskname.zip ├── frame_000001.png diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index fb9d73c94fdf..3917bfbf6ebe 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -1556,7 +1556,7 @@ def compare_objects(self, obj1, obj2, ignore_keys, fp_tolerance=.001): for v1, v2 in zip(obj1, obj2): compare_objects(self, v1, v2, ignore_keys) else: - if isinstance(obj1, float) or (obj2, float): + if isinstance(obj1, float) or isinstance(obj2, float): self.assertAlmostEqual(obj1, obj2, delta=fp_tolerance) else: self.assertEqual(obj1, obj2) @@ -2133,6 +2133,13 @@ def _upload_api_v1_tasks_id_annotations(self, pk, user, data, query_params=""): return response + def _get_annotation_formats(self, user): + with ForceLogin(user, self.client): + response = self.client.get( + path="/api/v1/server/annotation/formats" + ) + return response + def _check_response(self, response, data): if not response.status_code in [ status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN]: @@ -2657,13 +2664,19 @@ def _get_initial_annotation(annotation_format): return annotations - for annotation_format in AnnotationFormat.objects.all(): - for dumper in annotation_format.annotationdumper_set.all(): + response = self._get_annotation_formats(owner) + self.assertEqual(response.status_code, HTTP_200_OK) + + supported_formats = response.data + self.assertTrue(supported_formats) + + for annotation_format in supported_formats: + for dumper in annotation_format["dumpers"]: # 1. create task task, jobs = self._create_task(owner, assignee) # 2. add annotation - data = _get_initial_annotation(dumper.display_name) + data = _get_initial_annotation(dumper["display_name"]) response = self._put_api_v1_tasks_id_annotations(task["id"], annotator, data) data["version"] += 1 @@ -2672,22 +2685,22 @@ def _get_initial_annotation(annotation_format): # 3. download annotation response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "format={}".format(dumper.display_name)) + "format={}".format(dumper["display_name"])) self.assertEqual(response.status_code, HTTP_202_ACCEPTED) response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "format={}".format(dumper.display_name)) + "format={}".format(dumper["display_name"])) self.assertEqual(response.status_code, HTTP_201_CREATED) response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "action=download&format={}".format(dumper.display_name)) + "action=download&format={}".format(dumper["display_name"])) self.assertEqual(response.status_code, HTTP_200_OK) # 4. check downloaded data if response.status_code == status.HTTP_200_OK: self.assertTrue(response.streaming) content = io.BytesIO(b"".join(response.streaming_content)) - self._check_dump_content(content, task, jobs, data, annotation_format) + self._check_dump_content(content, task, jobs, data, annotation_format["name"]) content.seek(0) # 5. remove annotation form the task @@ -2700,11 +2713,11 @@ def _get_initial_annotation(annotation_format): "annotation_file": content, } - for loader in annotation_format.annotationloader_set.all(): - response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, uploaded_data, "format={}".format(loader.display_name)) + for loader in annotation_format["loaders"]: + response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, uploaded_data, "format={}".format(loader["display_name"])) self.assertEqual(response.status_code, HTTP_202_ACCEPTED) - response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, {}, "format={}".format(loader.display_name)) + response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, {}, "format={}".format(loader["display_name"])) self.assertEqual(response.status_code, HTTP_201_CREATED) response = self._get_api_v1_tasks_id_annotations(task["id"], annotator) @@ -2712,7 +2725,7 @@ def _get_initial_annotation(annotation_format): data["version"] += 2 # upload is delete + put self._check_response(response, data) - def _check_dump_content(self, content, task, jobs, data, annotation_format): + def _check_dump_content(self, content, task, jobs, data, annotation_format_name): def etree_to_dict(t): d = {t.tag: {} if t.attrib else None} children = list(t) @@ -2731,26 +2744,26 @@ def etree_to_dict(t): d[t.tag] = text return d - if annotation_format.name == "CVAT": + if annotation_format_name == "CVAT": xmldump = ET.fromstring(content.read()) self.assertEqual(xmldump.tag, "annotations") tags = xmldump.findall("./meta") self.assertEqual(len(tags), 1) meta = etree_to_dict(tags[0])["meta"] self.assertEqual(meta["task"]["name"], task["name"]) - elif annotation_format.name == "PASCAL VOC": + elif annotation_format_name == "PASCAL VOC": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format.name == "YOLO": + elif annotation_format_name == "YOLO": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format.name == "COCO": + elif annotation_format_name == "COCO": with tempfile.NamedTemporaryFile() as tmp_file: tmp_file.write(content.read()) tmp_file.flush() coco = coco_loader.COCO(tmp_file.name) self.assertTrue(coco.getAnnIds()) - elif annotation_format.name == "TFRecord": + elif annotation_format_name == "TFRecord": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format.name == "MASK": + elif annotation_format_name == "MASK": self.assertTrue(zipfile.is_zipfile(content)) def test_api_v1_tasks_id_annotations_admin(self): From d4c8f4c61f89fa24a8467be6369c60c9c10625e9 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 29 Oct 2019 19:52:15 +0300 Subject: [PATCH 16/16] fixed dump/load test for not authorized user --- cvat/apps/engine/tests/test_rest_api.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 3917bfbf6ebe..cea29e33079d 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2664,11 +2664,24 @@ def _get_initial_annotation(annotation_format): return annotations - response = self._get_annotation_formats(owner) + response = self._get_annotation_formats(annotator) self.assertEqual(response.status_code, HTTP_200_OK) - supported_formats = response.data - self.assertTrue(supported_formats) + + if annotator is not None: + supported_formats = response.data + else: + supported_formats = [{ + "name": "CVAT", + "dumpers": [{ + "display_name": "CVAT XML 1.1 for images" + }], + "loaders": [{ + "display_name": "CVAT XML 1.1" + }] + }] + + self.assertTrue(isinstance(supported_formats, list) and supported_formats) for annotation_format in supported_formats: for dumper in annotation_format["dumpers"]: