Skip to content

Commit

Permalink
Hotfix missing manifest file (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
Marishka17 authored Aug 18, 2022
1 parent 5e2eda7 commit 182c39a
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Annotation window might have top offset if try to move a locked object
- Image search in cloud storage (<https://github.com/cvat-ai/cvat/pull/8>)
- Reset password functionality (<https://github.com/cvat-ai/cvat/pull/52>)
- Creating task with cloud storage data (<https://github.com/cvat-ai/cvat/pull/116>)

### Security
- TDB
Expand Down
44 changes: 23 additions & 21 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

# Copyright (C) 2018-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -18,9 +19,11 @@
import ipaddress
import dns.resolver
import django_rq
import pytz

from django.conf import settings
from django.db import transaction
from datetime import datetime

from cvat.apps.engine import models
from cvat.apps.engine.log import slogger
Expand All @@ -30,7 +33,7 @@
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest
from utils.dataset_manifest.core import VideoManifestValidator
from utils.dataset_manifest.utils import detect_related_images
from .cloud_provider import get_cloud_storage_instance, Credentials
from .cloud_provider import db_storage_to_storage_instance

############################# Low Level server API

Expand Down Expand Up @@ -207,13 +210,21 @@ def _validate_data(counter, manifest_files=None):

return counter, task_modes[0]

def _validate_manifest(manifests, root_dir):
def _validate_manifest(manifests, root_dir, is_in_cloud, db_cloud_storage):
if manifests:
if len(manifests) != 1:
raise Exception('Only one manifest file can be attached with data')
manifest_file = manifests[0]
full_manifest_path = os.path.join(root_dir, manifests[0])
if is_in_cloud:
cloud_storage_instance = db_storage_to_storage_instance(db_cloud_storage)
# check that cloud storage manifest file exists and is up to date
if not os.path.exists(full_manifest_path) or \
datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) \
< cloud_storage_instance.get_file_last_modified(manifest_file):
cloud_storage_instance.download_file(manifest_file, full_manifest_path)
if is_manifest(full_manifest_path):
return manifests[0]
return manifest_file
raise Exception('Invalid manifest was uploaded')
return None

Expand Down Expand Up @@ -293,6 +304,7 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):

db_data = db_task.data
upload_dir = db_data.get_upload_dirname()
is_data_in_cloud = db_data.storage == models.StorageChoice.CLOUD_STORAGE

if data['remote_files'] and not isDatasetImport:
data['remote_files'] = _download_data(data['remote_files'], upload_dir)
Expand All @@ -310,28 +322,18 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
manifest_root = None
if db_data.storage in {models.StorageChoice.LOCAL, models.StorageChoice.SHARE}:
manifest_root = upload_dir
elif db_data.storage == models.StorageChoice.CLOUD_STORAGE:
elif is_data_in_cloud:
manifest_root = db_data.cloud_storage.get_storage_dirname()

manifest_file = _validate_manifest(manifest_files, manifest_root)
manifest_file = _validate_manifest(
manifest_files, manifest_root,
is_data_in_cloud, db_data.cloud_storage if is_data_in_cloud else None
)
if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE):
raise Exception("File with meta information can be uploaded if 'Use cache' option is also selected")

if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE:
if not manifest_file: raise Exception('A manifest file not found')
db_cloud_storage = db_data.cloud_storage
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
'value': db_cloud_storage.credentials,
})

details = {
'resource': db_cloud_storage.resource,
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
if data['server_files'] and is_data_in_cloud:
cloud_storage_instance = db_storage_to_storage_instance(db_data.cloud_storage)
sorted_media = sort(media['image'], data['sorting_method'])

data_size = len(sorted_media)
Expand Down Expand Up @@ -516,7 +518,7 @@ def update_progress(progress):
# calculate chunk size if it isn't specified
if db_data.chunk_size is None:
if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
if not db_data.storage == models.StorageChoice.CLOUD_STORAGE:
if not is_data_in_cloud:
w, h = extractor.get_image_size(0)
else:
img_properties = manifest[0]
Expand Down
13 changes: 8 additions & 5 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,12 +320,15 @@ def test_can_create_task_with_defined_start_and_stop_frames(self):
(task, _) = api_client.tasks_api.retrieve(task_id)
assert task.size == 4

@pytest.mark.parametrize('cloud_storage_id, manifest, org', [
(1, 'manifest.jsonl', ''), # public bucket
(2, 'sub/manifest.jsonl', 'org2'), # private bucket
@pytest.mark.parametrize('cloud_storage_id, manifest, use_bucket_content, org', [
(1, 'manifest.jsonl', False, ''), # public bucket
(2, 'sub/manifest.jsonl', True, 'org2'), # private bucket
])
def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, org):
cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest)
def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, use_bucket_content, org):
if use_bucket_content:
cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest)
else:
cloud_storage_content = ['image_case_65_1.png', 'image_case_65_2.png']
cloud_storage_content.append(manifest)

task_spec = {
Expand Down

0 comments on commit 182c39a

Please sign in to comment.