Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support relative paths in import and export #1463

Merged
merged 125 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
125 commits
Select commit Hold shift + click to select a range
9c87cdc
Move annotations to dm
zhiltsov-max Mar 30, 2020
06d6217
Refactor dm
zhiltsov-max Mar 30, 2020
40375f0
Rename data manager
zhiltsov-max Mar 31, 2020
8c64a9a
Move anno dump and upload functions
zhiltsov-max Mar 31, 2020
ce9c0a8
Join server host and port in cvat cli
zhiltsov-max Apr 2, 2020
cf181ae
Move export templates dir
zhiltsov-max Apr 3, 2020
76c48c7
add dm project exporter
zhiltsov-max Apr 3, 2020
a2f00f5
update mask format support
zhiltsov-max Apr 3, 2020
37a4a03
Use decorators for formats definition
zhiltsov-max Apr 3, 2020
7736039
Update formats
zhiltsov-max Apr 3, 2020
ba75d7c
Update format implementations
zhiltsov-max Apr 6, 2020
8b0173a
remove parameter
zhiltsov-max Apr 6, 2020
5c770b4
Add dm views
zhiltsov-max Apr 6, 2020
527123b
Move annotation components to dm
zhiltsov-max Apr 6, 2020
a758550
restore extension for export formats
zhiltsov-max Apr 7, 2020
81404b1
update rest api
zhiltsov-max Apr 7, 2020
92b1bba
use serializers, update views
zhiltsov-max Apr 7, 2020
ae7974e
merge develop
zhiltsov-max Apr 7, 2020
f8e2663
Update format names
zhiltsov-max Apr 9, 2020
927ca56
Update docs
zhiltsov-max Apr 9, 2020
cc04df1
Update tests
zhiltsov-max Apr 9, 2020
2d2ca2d
move test
zhiltsov-max Apr 9, 2020
0263a0e
fix import
zhiltsov-max Apr 9, 2020
ec52ecd
Extend format tests
zhiltsov-max Apr 9, 2020
76f496e
django compatibility for directory access
zhiltsov-max Apr 9, 2020
1f4612c
move tests
zhiltsov-max Apr 9, 2020
e139e98
update module links
zhiltsov-max Apr 9, 2020
acf79ca
fixes
zhiltsov-max Apr 9, 2020
c3d8ed1
fix git application
zhiltsov-max Apr 10, 2020
5567e54
fixes
zhiltsov-max Apr 10, 2020
c46769f
add extension recommentation
zhiltsov-max Apr 10, 2020
8ef19aa
fixes
zhiltsov-max Apr 13, 2020
1c42d41
api
zhiltsov-max Apr 13, 2020
780e108
join api methods
zhiltsov-max Apr 13, 2020
951bcb4
Add trim whitespace to workspace config
zhiltsov-max Apr 14, 2020
3c5caae
update tests
zhiltsov-max Apr 14, 2020
fce56ca
fixes
zhiltsov-max Apr 14, 2020
c87ece3
Update format docs
zhiltsov-max Apr 14, 2020
1e5f992
join format queries
zhiltsov-max Apr 15, 2020
73edf83
fixes
zhiltsov-max Apr 15, 2020
1915bb7
update new ui
zhiltsov-max Apr 15, 2020
d7ff4de
ui tests
zhiltsov-max Apr 15, 2020
01e176d
old ui
zhiltsov-max Apr 15, 2020
f14571f
update js bundles
zhiltsov-max Apr 15, 2020
fb1c177
linter fixes
zhiltsov-max Apr 15, 2020
7b1e758
add image with loader tests
zhiltsov-max Apr 15, 2020
5111148
fix linter
zhiltsov-max Apr 15, 2020
a94cc5e
fix frame step and frame access
zhiltsov-max Apr 16, 2020
fa0ac2a
use server file name for annotations export
zhiltsov-max Apr 16, 2020
daea029
update cvat core
zhiltsov-max Apr 16, 2020
9fa1957
add import hack for rest api tests
zhiltsov-max Apr 17, 2020
ff07d8c
move cli tests
zhiltsov-max Apr 17, 2020
1edd1ab
fix cvat format converter args parsing
zhiltsov-max Apr 17, 2020
d7bc832
remove folder on extract error
zhiltsov-max Apr 17, 2020
4f88440
print error message on incorrect xpath expression
zhiltsov-max Apr 17, 2020
1ace725
use own categories when no others exist
zhiltsov-max Apr 17, 2020
ca2384a
update changelog
zhiltsov-max Apr 17, 2020
6a244cf
Merge branch 'develop' into zm/merge-anno-and-dm-2
zhiltsov-max Apr 17, 2020
51581b7
really add text to changelog
zhiltsov-max Apr 17, 2020
c87bf6b
Merge branch 'zm/merge-anno-and-dm-2' of https://github.com/opencv/cv…
zhiltsov-max Apr 17, 2020
d991f11
Fix annotation window menu
zhiltsov-max Apr 20, 2020
7595119
fix ui
zhiltsov-max Apr 24, 2020
6b29167
fix replace
zhiltsov-max Apr 24, 2020
5cbdb3e
update extra apps
zhiltsov-max Apr 24, 2020
d69c0a4
format readme
zhiltsov-max Apr 24, 2020
5ad4b09
readme
zhiltsov-max Apr 24, 2020
0d36cc3
Merge remote-tracking branch 'origin/develop' into zm/merge-anno-and-…
zhiltsov-max Apr 24, 2020
cdb26c1
linter
zhiltsov-max Apr 24, 2020
0ca795e
Merge branch 'develop' into zm/merge-anno-and-dm-2
zhiltsov-max Apr 27, 2020
3ad5e25
Fix old ui
zhiltsov-max Apr 27, 2020
10a00e9
Update CHANGELOG.md
nmanovic Apr 27, 2020
61b7740
Merge branch 'develop' into zm/merge-anno-and-dm-2
zhiltsov-max Apr 27, 2020
6bf194b
update user guide
zhiltsov-max Apr 27, 2020
fd41e1d
linter
zhiltsov-max Apr 27, 2020
28d3e52
more linter fixes
zhiltsov-max Apr 27, 2020
9a4d1f1
update changelog
zhiltsov-max Apr 20, 2020
76674ed
Add image attributes
zhiltsov-max Apr 23, 2020
88b265f
add directory check in save image
zhiltsov-max Apr 23, 2020
24ff6c2
update image tests
zhiltsov-max Apr 23, 2020
3065c44
update image dir format with relative paths
zhiltsov-max Apr 23, 2020
3c847f0
update datumaro format
zhiltsov-max Apr 23, 2020
0576111
update coco format
zhiltsov-max Apr 23, 2020
b561d5a
update cvat format
zhiltsov-max Apr 23, 2020
1353b8b
update labelme format
zhiltsov-max Apr 23, 2020
d877b32
update mot format
zhiltsov-max Apr 23, 2020
44174b5
update image dir format
zhiltsov-max Apr 23, 2020
3978838
update voc format
zhiltsov-max Apr 23, 2020
c40bb44
update mot format
zhiltsov-max Apr 23, 2020
a690cc9
update yolo format
zhiltsov-max Apr 24, 2020
65ef33e
update labelme test
zhiltsov-max Apr 24, 2020
c84e74c
update voc format
zhiltsov-max Apr 24, 2020
fe1db2b
update tfrecord format
zhiltsov-max Apr 24, 2020
4b526a0
fixes
zhiltsov-max Apr 24, 2020
fe0dee2
update save_image usage
zhiltsov-max Apr 28, 2020
bdaa1c0
remove item name conversion
zhiltsov-max Apr 28, 2020
bd2f15e
fix merge
zhiltsov-max Apr 29, 2020
fe462f3
fix export
zhiltsov-max Apr 29, 2020
eb7879d
prohibit relative paths in labelme format
zhiltsov-max Apr 29, 2020
35b547e
Add test for relative name matching
zhiltsov-max May 7, 2020
584cda5
move code
zhiltsov-max May 7, 2020
490da50
implement frame matching
zhiltsov-max May 8, 2020
276613d
fix yolo
zhiltsov-max May 8, 2020
eda48df
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max May 14, 2020
19b75d2
fix merge
zhiltsov-max May 14, 2020
e4bf997
fix merge
zhiltsov-max May 15, 2020
471a45b
prettify code
zhiltsov-max May 15, 2020
be92a1a
fix methid call
zhiltsov-max May 16, 2020
3a01441
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max May 18, 2020
b11a350
fix frame matching in yolo
zhiltsov-max May 18, 2020
ec36ab4
add tests
zhiltsov-max May 19, 2020
3f3290a
regularize function output
zhiltsov-max May 19, 2020
d570f90
update changelog
zhiltsov-max May 20, 2020
2f341c9
fixes
zhiltsov-max May 20, 2020
83bab0b
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 3, 2020
115fd2a
fix z_order use
zhiltsov-max Jun 3, 2020
f5390ab
fix slash replacement
zhiltsov-max Jun 3, 2020
7ea787e
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 4, 2020
fef245f
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 4, 2020
070008e
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 8, 2020
8277b52
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 10, 2020
9276fa1
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 17, 2020
b3da592
linter
zhiltsov-max Jun 17, 2020
f8b1e15
Merge branch 'develop' into zm/dm-relative-image-paths
zhiltsov-max Jun 17, 2020
3de34bf
t
zhiltsov-max Jun 17, 2020
efabe2c
t2
zhiltsov-max Jun 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Removed information about e-mail from the basic user information (<https://github.com/opencv/cvat/pull/1627>)
- Update https install manual. Makes it easier and more robust. Includes automatic renewing of lets encrypt certificates.
- Implemented import and export of annotations with relative image paths (<https://github.com/opencv/cvat/pull/1463>)

### Deprecated
-
Expand Down
72 changes: 48 additions & 24 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import os.path as osp
from collections import OrderedDict, namedtuple
from pathlib import Path

from django.utils import timezone

Expand Down Expand Up @@ -125,8 +126,8 @@ def _init_frame_info(self):
} for db_image in self._db_task.data.images.all()}

self._frame_mapping = {
self._get_filename(info["path"]): frame
for frame, info in self._frame_info.items()
self._get_filename(info["path"]): frame_number
for frame_number, info in self._frame_info.items()
}

def _init_meta(self):
Expand Down Expand Up @@ -398,16 +399,27 @@ def db_task(self):

@staticmethod
def _get_filename(path):
return osp.splitext(osp.basename(path))[0]

def match_frame(self, filename):
# try to match by filename
_filename = self._get_filename(filename)
if _filename in self._frame_mapping:
return self._frame_mapping[_filename]

raise Exception(
"Cannot match filename or determine frame number for {} filename".format(filename))
return osp.splitext(path)[0]

def match_frame(self, path, root_hint=None):
path = self._get_filename(path)
match = self._frame_mapping.get(path)
if not match and root_hint and not path.startswith(root_hint):
path = osp.join(root_hint, path)
match = self._frame_mapping.get(path)
return match

def match_frame_fuzzy(self, path):
# Preconditions:
# - The input dataset is full, i.e. all items present. Partial dataset
# matching can't be correct for all input cases.
# - path is the longest path of input dataset in terms of path parts

path = Path(self._get_filename(path)).parts
for p, v in self._frame_mapping.items():
if Path(p).parts[-len(path):] == path: # endswith() for paths
return v
return None

class CvatTaskDataExtractor(datumaro.SourceExtractor):
def __init__(self, task_data, include_images=False):
Expand Down Expand Up @@ -450,8 +462,7 @@ def categories(self):
def _load_categories(cvat_anno):
categories = {}

label_categories = datumaro.LabelCategories(
attributes=['occluded', 'z_order'])
label_categories = datumaro.LabelCategories(attributes=['occluded'])

for _, label in cvat_anno.meta['task']['labels']:
label_categories.add(label['name'])
Expand Down Expand Up @@ -537,20 +548,14 @@ def convert_attrs(label, cvat_attrs):

return item_anno

def match_frame(item, task_data):
def match_dm_item(item, task_data, root_hint=None):
is_video = task_data.meta['task']['mode'] == 'interpolation'

frame_number = None
if frame_number is None and item.has_image:
try:
frame_number = task_data.match_frame(item.image.path)
except Exception:
pass
frame_number = task_data.match_frame(item.image.path, root_hint)
if frame_number is None:
try:
frame_number = task_data.match_frame(item.id)
except Exception:
pass
frame_number = task_data.match_frame(item.id, root_hint)
if frame_number is None:
frame_number = cast(item.attributes.get('frame', item.id), int)
if frame_number is None and is_video:
Expand All @@ -561,6 +566,19 @@ def match_frame(item, task_data):
item.id)
return frame_number

def find_dataset_root(dm_dataset, task_data):
longest_path = max(dm_dataset, key=lambda x: len(Path(x.id).parts)).id
longest_match = task_data.match_frame_fuzzy(longest_path)
if longest_match is None:
return None

longest_match = osp.dirname(task_data.frame_info[longest_match]['path'])
prefix = longest_match[:-len(osp.dirname(longest_path)) or None]
if prefix.endswith('/'):
prefix = prefix[:-1]
return prefix


def import_dm_annotations(dm_dataset, task_data):
shapes = {
datumaro.AnnotationType.bbox: ShapeType.RECTANGLE,
Expand All @@ -569,10 +587,16 @@ def import_dm_annotations(dm_dataset, task_data):
datumaro.AnnotationType.points: ShapeType.POINTS,
}

if len(dm_dataset) == 0:
return

label_cat = dm_dataset.categories()[datumaro.AnnotationType.label]

root_hint = find_dataset_root(dm_dataset, task_data)

for item in dm_dataset:
frame_number = task_data.abs_frame_id(match_frame(item, task_data))
frame_number = task_data.abs_frame_id(
match_dm_item(item, task_data, root_hint=root_hint))

# do not store one-item groups
group_map = {0: 0}
Expand Down
16 changes: 10 additions & 6 deletions cvat/apps/dataset_manager/formats/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
import_dm_annotations, match_frame)
import_dm_annotations, match_dm_item, find_dataset_root)
from cvat.apps.dataset_manager.util import make_zip_archive
from datumaro.components.extractor import DatasetItem
from datumaro.components.project import Dataset
from datumaro.plugins.yolo_format.extractor import YoloExtractor

from .registry import dm_env, exporter, importer

Expand All @@ -33,17 +34,20 @@ def _import(src_file, task_data):
Archive(src_file.name).extractall(tmp_dir)

image_info = {}
anno_files = glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)
for filename in anno_files:
filename = osp.splitext(osp.basename(filename))[0]
frames = [YoloExtractor.name_from_path(osp.relpath(p, tmp_dir))
for p in glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True)]
root_hint = find_dataset_root(
[DatasetItem(id=frame) for frame in frames], task_data)
for frame in frames:
frame_info = None
try:
frame_id = match_frame(DatasetItem(id=filename), task_data)
frame_id = match_dm_item(DatasetItem(id=frame), task_data,
root_hint=root_hint)
frame_info = task_data.frame_info[frame_id]
except Exception:
pass
if frame_info is not None:
image_info[filename] = (frame_info['height'], frame_info['width'])
image_info[frame] = (frame_info['height'], frame_info['width'])

dataset = dm_env.make_importer('yolo')(tmp_dir, image_info=image_info) \
.make_dataset()
Expand Down
101 changes: 100 additions & 1 deletion cvat/apps/dataset_manager/tests/_test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def _setUpModule():
from rest_framework.test import APITestCase, APIClient
from rest_framework import status

from cvat.apps.dataset_manager.annotation import AnnotationIR
from cvat.apps.dataset_manager.bindings import TaskData, find_dataset_root
from cvat.apps.engine.models import Task

_setUpModule()

from cvat.apps.dataset_manager.annotation import AnnotationIR
Expand Down Expand Up @@ -256,7 +260,7 @@ def _generate_annotations(self, task):
self._put_api_v1_task_id_annotations(task["id"], annotations)
return annotations

def _generate_task_images(self, count):
def _generate_task_images(self, count): # pylint: disable=no-self-use
images = {
"client_files[%d]" % i: generate_image_file("image_%d.jpg" % i)
for i in range(count)
Expand Down Expand Up @@ -385,6 +389,7 @@ def load_dataset(src):

# NOTE: can't import cvat.utils.cli
# for whatever reason, so remove the dependency
#
project.config.remove('sources')

return project.make_dataset()
Expand Down Expand Up @@ -436,3 +441,97 @@ def test_can_make_abs_frame_id_from_known(self):
task_data = TaskData(AnnotationIR(), Task.objects.get(pk=task['id']))

self.assertEqual(5, task_data.abs_frame_id(2))

class FrameMatchingTest(_DbTestBase):
def _generate_task_images(self, paths): # pylint: disable=no-self-use
f = BytesIO()
with zipfile.ZipFile(f, 'w') as archive:
for path in paths:
archive.writestr(path, generate_image_file(path).getvalue())
f.name = 'images.zip'
f.seek(0)

return {
'client_files[0]': f,
'image_quality': 75,
}

def _generate_task(self, images):
task = {
"name": "my task #1",
"owner": '',
"assignee": '',
"overlap": 0,
"segment_size": 100,
"z_order": False,
"labels": [
{
"name": "car",
"attributes": [
{
"name": "model",
"mutable": False,
"input_type": "select",
"default_value": "mazda",
"values": ["bmw", "mazda", "renault"]
},
{
"name": "parked",
"mutable": True,
"input_type": "checkbox",
"default_value": False
},
]
},
{"name": "person"},
]
}
return self._create_task(task, images)

def test_frame_matching(self):
task_paths = [
'a.jpg',
'a/a.jpg',
'a/b.jpg',
'b/a.jpg',
'b/c.jpg',
'a/b/c.jpg',
'a/b/d.jpg',
]

images = self._generate_task_images(task_paths)
task = self._generate_task(images)
task_data = TaskData(AnnotationIR(), Task.objects.get(pk=task["id"]))

for input_path, expected, root in [
('z.jpg', None, ''), # unknown item
('z/a.jpg', None, ''), # unknown item

('d.jpg', 'a/b/d.jpg', 'a/b'), # match with root hint
('b/d.jpg', 'a/b/d.jpg', 'a'), # match with root hint
] + list(zip(task_paths, task_paths, [None] * len(task_paths))): # exact matches
with self.subTest(input=input_path):
actual = task_data.match_frame(input_path, root)
if actual is not None:
actual = task_data.frame_info[actual]['path']
self.assertEqual(expected, actual)

def test_dataset_root(self):
for task_paths, dataset_paths, expected in [
([ 'a.jpg', 'b/c/a.jpg' ], [ 'a.jpg', 'b/c/a.jpg' ], ''),
([ 'b/a.jpg', 'b/c/a.jpg' ], [ 'a.jpg', 'c/a.jpg' ], 'b'), # 'images from share' case
([ 'b/c/a.jpg' ], [ 'a.jpg' ], 'b/c'), # 'images from share' case
([ 'a.jpg' ], [ 'z.jpg' ], None),
]:
with self.subTest(expected=expected):
images = self._generate_task_images(task_paths)
task = self._generate_task(images)
task_data = TaskData(AnnotationIR(),
Task.objects.get(pk=task["id"]))
dataset = [
datumaro.components.extractor.DatasetItem(
id=osp.splitext(p)[0])
for p in dataset_paths]

root = find_dataset_root(dataset, task_data)
self.assertEqual(expected, root)