Manifest optimization (#3712)

cvat-ai · Oct 7, 2021 · 5b890b1 · 5b890b1
1 parent cf6878e
commit 5b890b1
Show file tree

Hide file tree

Showing 7 changed files with 324 additions and 254 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,10 +12,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - cvat-ui: support cloud storages (<https://github.com/openvinotoolkit/cvat/pull/3372>)
 - interactor: add HRNet interactive segmentation serverless function (<https://github.com/openvinotoolkit/cvat/pull/3740>)
 - Added GPU implementation for SiamMask, reworked tracking approach (<https://github.com/openvinotoolkit/cvat/pull/3571>)
+- Progress bar for manifest creating (<https://github.com/openvinotoolkit/cvat/pull/3712>)
 
 ### Changed
 
 - UI tracking has been reworked (<https://github.com/openvinotoolkit/cvat/pull/3571>)
+- Manifest generation: Reduce creating time (<https://github.com/openvinotoolkit/cvat/pull/3712>)
 
 ### Deprecated
 

diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py
@@ -43,10 +43,11 @@ def migrate2meta(apps, shema_editor):
  continue
  media_file = os.path.join(data_dir, db_data.video.path)
  logger.info('Preparing of the video meta has begun')
- meta = VideoManifestManager(manifest_path=upload_dir) \
- .prepare_meta(media_file=media_file, force=True)
+ manifest = VideoManifestManager(manifest_path=upload_dir)
+ manifest.link(media_file=media_file, force=True)
+ manifest.init_index()
  with open(meta_path, "w") as meta_file:
- for idx, pts, _ in meta:
+ for idx, pts, _ in manifest.reader:
  meta_file.write(f"{idx} {pts}\n")
  else:
  name_format = "dummy_{}.txt"
@@ -87,12 +88,9 @@ def migrate2manifest(apps, shema_editor):
  if hasattr(db_data, 'video'):
  media_file = os.path.join(data_dir, db_data.video.path)
  manifest = VideoManifestManager(manifest_path=upload_dir)
- logger.info('Preparing of the video meta information has begun')
- meta_info = manifest.prepare_meta(media_file=media_file, force=True)
+ manifest.link(media_file=media_file, force=True)
  logger.info('Manifest creating has begun')
- manifest.create(meta_info)
- logger.info('Index creating has begun')
- manifest.init_index()
+ manifest.create()
  else:
  manifest = ImageManifestManager(manifest_path=upload_dir)
  sources = []
@@ -105,36 +103,21 @@ def migrate2manifest(apps, shema_editor):
  sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')]
  if any(list(filter(lambda x: x.dimension==DimensionType.DIM_3D, db_data.tasks.all()))):
  logger.info('Preparing of images 3d meta information has begun')
- content = []
- for source in sources:
- name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
- content.append({
- 'name': name,
- 'extension': ext
- })
+ manifest.link(sources=sources, data_dir=data_dir, DIM_3D=True)
  else:
  logger.info('Preparing of 2d images meta information has begun')
- meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir)
- content = meta_info.content
+ manifest.link(sources=sources, data_dir=data_dir)
 
  if db_data.storage == StorageChoice.SHARE:
  def _get_frame_step(str_):
  match = search("step\s*=\s*([1-9]\d*)", str_)
  return int(match.group(1)) if match else 1
  logger.info('Data is located on the share, metadata update has been started')
- step = _get_frame_step(db_data.frame_filter)
- start = db_data.start_frame
- stop = db_data.stop_frame + 1
- images_range = range(start, stop, step)
- result_content = []
- for i in range(stop):
- item = content.pop(0) if i in images_range else dict()
- result_content.append(item)
- content = result_content
+ manifest.step = _get_frame_step(db_data.frame_filter)
+ manifest.start = db_data.start_frame
+ manifest.stop = db_data.stop_frame + 1
  logger.info('Manifest creating has begun')
- manifest.create(content)
- logger.info('Index creating has begun')
- manifest.init_index()
+ manifest.create()
  logger.info('Succesfull migration for the data({})'.format(db_data.id))
  except Exception as ex:
  logger.error(str(ex))

diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
@@ -265,7 +265,6 @@ def _create_thread(tid, data, isImport=False):
  media_files = sorted(media['image'])
  content = cloud_storage_manifest.get_subset(media_files)
  manifest.create(content)
- manifest.init_index()
 
  av_scan_paths(upload_dir)
 
@@ -424,8 +423,7 @@ def _update_status(msg):
  video_size = manifest.video_resolution
  manifest_is_prepared = True
  except Exception as ex:
- if os.path.exists(db_data.get_index_path()):
- os.remove(db_data.get_index_path())
+ manifest.remove()
  if isinstance(ex, AssertionError):
  base_msg = str(ex)
  else:
@@ -436,53 +434,41 @@ def _update_status(msg):
  if not manifest_is_prepared:
  _update_status('Start prepare a manifest file')
  manifest = VideoManifestManager(db_data.get_manifest_path())
- meta_info = manifest.prepare_meta(
+ manifest.link(
  media_file=media_files[0],
  upload_dir=upload_dir,
  chunk_size=db_data.chunk_size
  )
- manifest.create(meta_info)
- manifest.init_index()
+ manifest.create()
  _update_status('A manifest had been created')
 
- all_frames = meta_info.get_size()
- video_size = meta_info.frame_sizes
+ all_frames = len(manifest.reader)
+ video_size = manifest.reader.resolution
  manifest_is_prepared = True
 
  db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \
  if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
  video_path = os.path.join(upload_dir, media_files[0])
  except Exception as ex:
  db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM
- if os.path.exists(db_data.get_manifest_path()):
- os.remove(db_data.get_manifest_path())
- if os.path.exists(db_data.get_index_path()):
- os.remove(db_data.get_index_path())
+ manifest.remove()
+ del manifest
  base_msg = str(ex) if isinstance(ex, AssertionError) \
  else "Uploaded video does not support a quick way of task creating."
  _update_status("{} The task will be created using the old method".format(base_msg))
  else: # images, archive, pdf
  db_data.size = len(extractor)
  manifest = ImageManifestManager(db_data.get_manifest_path())
  if not manifest_file:
- if db_task.dimension == models.DimensionType.DIM_2D:
- meta_info = manifest.prepare_meta(
- sources=extractor.absolute_source_paths,
- meta={ k: {'related_images': related_images[k] } for k in related_images },
- data_dir=upload_dir
- )
- content = meta_info.content
- else:
- content = []
- for source in extractor.absolute_source_paths:
- name, ext = os.path.splitext(os.path.relpath(source, upload_dir))
- content.append({
- 'name': name,
- 'meta': { 'related_images': related_images[''.join((name, ext))] },
- 'extension': ext
- })
- manifest.create(content)
- manifest.init_index()
+ manifest.link(
+ sources=extractor.absolute_source_paths,
+ meta={ k: {'related_images': related_images[k] } for k in related_images },
+ data_dir=upload_dir,
+ DIM_3D=(db_task.dimension == models.DimensionType.DIM_3D),
+ )
+ manifest.create()
+ else:
+ manifest.init_index()
  counter = itertools.count()
  for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size):
  chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames]

diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py
@@ -2512,11 +2512,11 @@ def generate_manifest_file(data_type, manifest_path, sources):
  }
 
  if data_type == 'video':
- manifest = VideoManifestManager(manifest_path)
+ manifest = VideoManifestManager(manifest_path, create_index=False)
  else:
- manifest = ImageManifestManager(manifest_path)
- prepared_meta = manifest.prepare_meta(**kwargs[data_type])
- manifest.create(prepared_meta)
+ manifest = ImageManifestManager(manifest_path, create_index=False)
+ manifest.link(**kwargs[data_type])
+ manifest.create()
 
 class TaskDataAPITestCase(APITestCase):
  _image_sizes = {}