Skip to content

Commit

Permalink
Manifest fixes (#3146)
Browse files Browse the repository at this point in the history
* Add logger, reverse func
* Fix image filtering
* Fix upload video manifest

Co-authored-by: Nikita Manovich <nikita.manovich@intel.com>
  • Loading branch information
Marishka17 and Nikita Manovich authored May 4, 2021
1 parent 73b85a9 commit e7cca0e
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Changing a label on canvas does not work when 'Show object details' enabled (<https://github.com/openvinotoolkit/cvat/pull/3084>)
- Make sure frame unzip web worker correctly terminates after unzipping all images in a requested chunk (<https://github.com/openvinotoolkit/cvat/pull/3096>)
- Reset password link was unavailable before login (<https://github.com/openvinotoolkit/cvat/pull/3140>)
- Manifest: migration (<https://github.com/openvinotoolkit/cvat/pull/3146>)

### Security

Expand Down
92 changes: 86 additions & 6 deletions cvat/apps/engine/migrations/0038_manifest.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,120 @@
# Generated by Django 3.1.1 on 2021-02-20 08:36

import glob
import itertools
import logging
import os
import sys
from re import search

from django.conf import settings
from django.db import migrations

from cvat.apps.engine.models import (DimensionType, StorageChoice,
StorageMethodChoice)
from cvat.apps.engine.media_extractors import get_mime
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager

def migrate_data(apps, shema_editor):
def get_logger():
migration = os.path.basename(__file__).split(".")[0]
logger = logging.getLogger(name=migration)
logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(os.path.join(settings.MIGRATIONS_LOGS_ROOT, f"{migration}.log"))
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(logging.StreamHandler(sys.stdout))
logger.addHandler(logging.StreamHandler(sys.stderr))
return logger

def _get_query_set(apps):
Data = apps.get_model("engine", "Data")
query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE)
return query_set

def migrate2meta(apps, shema_editor):
logger = get_logger()
query_set = _get_query_set(apps)
for db_data in query_set:
try:
upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id)
logger.info('Migrate data({}), folder - {}'.format(db_data.id, upload_dir))
meta_path = os.path.join(upload_dir, "meta_info.txt")
if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')):
os.remove(os.path.join(upload_dir, 'manifest.jsonl'))
logger.info('A manifest file has been deleted')
if os.path.exists(os.path.join(upload_dir, 'index.json')):
os.remove(os.path.join(upload_dir, 'index.json'))
logger.info('A manifest index file has been deleted')
data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
if hasattr(db_data, 'video'):
if os.path.exists(meta_path):
logger.info('A meta_info.txt already exists')
continue
media_file = os.path.join(data_dir, db_data.video.path)
logger.info('Preparing of the video meta has begun')
meta = VideoManifestManager(manifest_path=upload_dir) \
.prepare_meta(media_file=media_file, force=True)
with open(meta_path, "w") as meta_file:
for idx, pts, _ in meta:
meta_file.write(f"{idx} {pts}\n")
else:
name_format = "dummy_{}.txt"
sources = [db_image.path for db_image in db_data.images.all().order_by('frame')]
counter = itertools.count()
logger.info('Preparing of the dummy chunks has begun')
for idx, img_paths in itertools.groupby(sources, lambda x: next(counter) // db_data.chunk_size):
if os.path.exists(os.path.join(upload_dir, name_format.format(idx))):
logger.info(name_format.format(idx) + " already exists")
continue
with open(os.path.join(upload_dir, name_format.format(idx)), "w") as dummy_chunk:
dummy_chunk.writelines([f"{img_path}\n" for img_path in img_paths])
logger.info('Succesfull migration for the data({})'.format(db_data.id))
except Exception as ex:
logger.error(str(ex))

def migrate2manifest(apps, shema_editor):
logger = get_logger()
logger.info('The data migration has been started for creating manifest`s files')
query_set = _get_query_set(apps)
logger.info('Need to update {} data objects'.format(len(query_set)))
for db_data in query_set:
try:
upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id)
logger.info('Migrate data({}), folder - {}'.format(db_data.id, upload_dir))
if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')):
os.remove(os.path.join(upload_dir, 'meta_info.txt'))
os.remove(os.path.join(upload_dir, 'meta_info.txt'))
logger.info('{}/meta_info.txt has been deleted'.format(upload_dir))
else:
for path in glob.glob(f'{upload_dir}/dummy_*.txt'):
os.remove(path)
logger.info(f"{path} has been deleted")
# it's necessary for case with long data migration
if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')):
logger.info('Manifest file already exists')
continue
data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT
if hasattr(db_data, 'video'):
media_file = os.path.join(data_dir, db_data.video.path)
manifest = VideoManifestManager(manifest_path=upload_dir)
meta_info = manifest.prepare_meta(media_file=media_file)
logger.info('Preparing of the video meta information has begun')
meta_info = manifest.prepare_meta(media_file=media_file, force=True)
logger.info('Manifest creating has begun')
manifest.create(meta_info)
logger.info('Index creating has begun')
manifest.init_index()
else:
manifest = ImageManifestManager(manifest_path=upload_dir)
sources = []
if db_data.storage == StorageChoice.LOCAL:
for (root, _, files) in os.walk(data_dir):
sources.extend([os.path.join(root, f) for f in files])
sources.extend([os.path.join(root, f) for f in files if get_mime(f) == 'image'])
sources.sort()
# using share, this means that we can not explicitly restore the entire data structure
else:
sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')]
if any(list(filter(lambda x: x.dimension==DimensionType.DIM_3D, db_data.tasks.all()))):
logger.info('Preparing of images 3d meta information has begun')
content = []
for source in sources:
name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
Expand All @@ -51,13 +123,15 @@ def migrate_data(apps, shema_editor):
'extension': ext
})
else:
logger.info('Preparing of 2d images meta information has begun')
meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir)
content = meta_info.content

if db_data.storage == StorageChoice.SHARE:
def _get_frame_step(str_):
match = search("step\s*=\s*([1-9]\d*)", str_)
return int(match.group(1)) if match else 1
logger.info('Data is located on the share, metadata update has been started')
step = _get_frame_step(db_data.frame_filter)
start = db_data.start_frame
stop = db_data.stop_frame + 1
Expand All @@ -67,10 +141,13 @@ def _get_frame_step(str_):
item = content.pop(0) if i in images_range else dict()
result_content.append(item)
content = result_content
logger.info('Manifest creating has begun')
manifest.create(content)
logger.info('Index creating has begun')
manifest.init_index()
logger.info('Succesfull migration for the data({})'.format(db_data.id))
except Exception as ex:
print(str(ex))
logger.error(str(ex))

class Migration(migrations.Migration):

Expand All @@ -79,5 +156,8 @@ class Migration(migrations.Migration):
]

operations = [
migrations.RunPython(migrate_data)
migrations.RunPython(
code=migrate2manifest,
reverse_code=migrate2meta
)
]
14 changes: 10 additions & 4 deletions utils/dataset_manifest/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def index(self):
return self._index

class VideoManifestManager(_ManifestManager):
def __init__(self, manifest_path, *args, **kwargs):
def __init__(self, manifest_path):
super().__init__(manifest_path)
setattr(self._manifest, 'TYPE', 'video')
self.BASE_INFORMATION['properties'] = 3
Expand Down Expand Up @@ -381,9 +381,15 @@ def validate_base_info(self):
assert self._manifest.TYPE != json.loads(manifest_file.readline())['type']

class VideoManifestValidator(VideoManifestManager):
def __init__(self, **kwargs):
self.source_path = kwargs.pop('source_path')
super().__init__(self, **kwargs)
def __init__(self, source_path, manifest_path):
self.source_path = source_path
super().__init__(manifest_path)

@staticmethod
def _get_video_stream(container):
video_stream = next(stream for stream in container.streams if stream.type == 'video')
video_stream.thread_type = 'AUTO'
return video_stream

def validate_key_frame(self, container, video_stream, key_frame):
for packet in container.demux(video_stream):
Expand Down

0 comments on commit e7cca0e

Please sign in to comment.