From 8fa9caf8ba1a78966c6bbc4e7662bd9305aad057 Mon Sep 17 00:00:00 2001 From: Maya Date: Sat, 3 Jul 2021 18:36:25 +0300 Subject: [PATCH 01/24] Add preview && some fixes --- cvat/apps/engine/models.py | 3 ++ cvat/apps/engine/serializers.py | 10 ++++++- cvat/apps/engine/views.py | 53 +++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 78ec751abd25..ef199d0268cc 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -608,6 +608,9 @@ def get_storage_logs_dirname(self): def get_log_path(self): return os.path.join(self.get_storage_dirname(), "storage.log") + def get_preview_path(self): + return os.path.join(self.get_storage_dirname(), 'preview.jpeg') + def get_specific_attributes(self): specific_attributes = self.specific_attributes return { diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 0c0b1130e115..7ed8311a98cd 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -790,7 +790,7 @@ class Meta: fields = ( 'provider_type', 'resource', 'display_name', 'owner', 'credentials_type', 'created_date', 'updated_date', 'session_token', 'account_name', 'key', - 'secret_key', 'specific_attributes', 'description' + 'secret_key', 'specific_attributes', 'description', 'id' ) read_only_fields = ('created_date', 'updated_date', 'owner') @@ -841,6 +841,12 @@ def create(self, validated_data): **validated_data ) db_storage.save() + + cloud_storage_path = db_storage.get_storage_dirname() + if os.path.isdir(cloud_storage_path): + shutil.rmtree(cloud_storage_path) + + os.makedirs(db_storage.get_storage_logs_dirname(), exist_ok=True) return db_storage # pylint: disable=no-self-use @@ -856,6 +862,8 @@ def update(self, instance, validated_data): instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type) instance.resource = validated_data.get('resource', instance.resource) instance.display_name = validated_data.get('display_name', instance.display_name) + instance.description = validated_data.get('description', instance.description) + instance.specific_attributes = validated_data.get('specific_attributes', instance.specific_attributes) instance.save() return instance diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 3adaa017cde8..7d16c90a4779 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -11,7 +11,8 @@ import uuid from datetime import datetime from distutils.util import strtobool -from tempfile import mkstemp, TemporaryDirectory +from tempfile import mkstemp, NamedTemporaryFile, TemporaryDirectory +from PIL import Image as PILImage import cv2 from django.db.models.query import Prefetch @@ -44,6 +45,8 @@ from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider +from cvat.apps.engine.media_extractors import MEDIA_TYPES, ImageListReader +from cvat.apps.engine.mime_types import mimetypes from cvat.apps.engine.models import ( Job, StatusChoice, Task, Project, Review, Issue, Comment, StorageMethodChoice, ReviewStatus, StorageChoice, Image, @@ -206,6 +209,7 @@ def plugins(request): class ProjectFilter(filters.FilterSet): name = filters.CharFilter(field_name="name", lookup_expr="icontains") owner = filters.CharFilter(field_name="owner__username", lookup_expr="icontains") + assignee = filters.CharFilter(field_name="assignee__username", lookup_expr="icontains") status = filters.CharFilter(field_name="status", lookup_expr="icontains") class Meta: @@ -233,7 +237,7 @@ class Meta: @method_decorator(name='partial_update', decorator=swagger_auto_schema(operation_summary='Methods does a partial update of chosen fields in a project')) class ProjectViewSet(auth.ProjectGetQuerySetMixin, viewsets.ModelViewSet): queryset = models.Project.objects.all().order_by('-id') - search_fields = ("name", "owner__username", "status") + search_fields = ("name", "owner__username", "assignee__username", "status") filterset_class = ProjectFilter ordering_fields = ("id", "name", "owner", "status", "assignee") http_method_names = ['get', 'post', 'head', 'patch', 'delete'] @@ -1300,6 +1304,51 @@ def content(self, request, pk): except Exception as ex: return HttpResponseBadRequest(str(ex)) + @swagger_auto_schema( + method='get', + operation_summary='Method returns a preview image from a cloud storage', + responses={ + '200': openapi.Response(description='Preview'), + }, + tags=['cloud storages'] + ) + @action(detail=True, methods=['GET'], url_path='preview') + def preview(self, request, pk): + try: + db_storage = CloudStorageModel.objects.get(pk=pk) + if not os.path.exists(db_storage.get_preview_path()): + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_storage.credentials_type, + 'value': db_storage.credentials, + }) + details = { + 'resource': db_storage.resource, + 'credentials': credentials, + 'specific_attributes': db_storage.get_specific_attributes() + } + storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) + storage.initialize_content() + storage_images = [f for f in storage.content if MEDIA_TYPES['image']['has_mime_type'](f)] + if not len(storage_images): + return HttpResponseBadRequest('Cloud storage {} does not contain any images'.format(pk)) + with NamedTemporaryFile() as temp_image: + storage.download_file(storage_images[0], temp_image.name) + reader = ImageListReader([temp_image.name]) + preview = reader.get_preview() + preview.save(db_storage.get_preview_path()) + buf = io.BytesIO() + PILImage.open(db_storage.get_preview_path()).save(buf, format='JPEG') + buf.seek(0) + content_type = mimetypes.guess_type(db_storage.get_preview_path())[0] + return HttpResponse(buf.getvalue(), content_type) + except CloudStorageModel.DoesNotExist: + message = f"Storage {pk} does not exist" + slogger.glob.error(message) + return HttpResponseNotFound(message) + except Exception as ex: + return HttpResponseBadRequest(str(ex)) + def rq_handler(job, exc_type, exc_value, tb): job.exc_info = "".join( traceback.format_exception_only(exc_type, exc_value)) From 46587a934070ddf8ba4443b303cddff2f3d8c6c6 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 5 Jul 2021 17:46:57 +0300 Subject: [PATCH 02/24] Fix case with sub dirs on cloud storage --- cvat/apps/engine/cache.py | 23 ++++++++++++++++------- cvat/apps/engine/cloud_provider.py | 7 +++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 75196ee8403f..be450d4dbca5 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -7,7 +7,7 @@ from diskcache import Cache from django.conf import settings -from tempfile import NamedTemporaryFile +from tempfile import NamedTemporaryFile, gettempdir from cvat.apps.engine.log import slogger from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter, @@ -84,18 +84,22 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) cloud_storage_instance.initialize_content() for item in reader: - name = f"{item['name']}{item['extension']}" - if name not in cloud_storage_instance: - raise Exception('{} file was not found on a {} storage'.format(name, cloud_storage_instance.name)) - with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=name, delete=False) as temp_file: + # full_name may be 'sub_dir/image.jpeg' + full_name = f"{item['name']}{item['extension']}" + if full_name not in cloud_storage_instance: + raise Exception('{} file was not found on a {} storage'.format(full_name, cloud_storage_instance.name)) + head, file_name = os.path.split(full_name) + abs_head = os.path.join(gettempdir(), head) + os.makedirs(abs_head, exist_ok=True) + with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name, delete=False, dir=abs_head) as temp_file: source_path = temp_file.name - buf = cloud_storage_instance.download_fileobj(name) + buf = cloud_storage_instance.download_fileobj(full_name) temp_file.write(buf.getvalue()) checksum = item.get('checksum', None) if not checksum: slogger.glob.warning('A manifest file does not contain checksum for image {}'.format(item.get('name'))) if checksum and not md5_hash(source_path) == checksum: - slogger.glob.warning('Hash sums of files {} do not match'.format(name)) + slogger.glob.warning('Hash sums of files {} do not match'.format(full_name)) images.append((source_path, source_path, None)) else: for item in reader: @@ -104,9 +108,14 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): writer.save_as_chunk(images, buff) buff.seek(0) if db_data.storage == StorageChoice.CLOUD_STORAGE: + tmp = gettempdir() + created_dirs = set(filter(lambda x: x if x.lstrip(tmp) else None, [os.path.dirname(i[0]) for i in images])) images = [image[0] for image in images if os.path.exists(image[0])] for image_path in images: os.remove(image_path) + for created_dir in created_dirs: + if not os.listdir(created_dir): + os.rmdir(created_dir) return buff, mime_type def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type): diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 017d5f7db9e0..db293cbd637c 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -271,7 +271,7 @@ def convert_to_db(self): CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET : \ " ".join([self.key, self.secret_key, self.session_token]), CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]), - CredentialsTypeChoice.ANONYMOUS_ACCESS: "", + CredentialsTypeChoice.ANONYMOUS_ACCESS: "" if not self.account_name else self.account_name, } return converted_credentials[self.credentials_type] @@ -282,7 +282,10 @@ def convert_from_db(self, credentials): elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR: self.account_name, self.session_token = credentials.get('value').split() else: - self.account_name, self.session_token, self.key, self.secret_key = ('', '', '', '') + # anonymous access + self.session_token, self.key, self.secret_key = ('', '', '') + # account_name will be in [some_value, ''] + self.account_name = credentials.get('value') self.credentials_type = None def mapping_with_new_values(self, credentials): From 93eea3704f140488d7ae1e34d575991401d1e671 Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 6 Jul 2021 12:55:26 +0300 Subject: [PATCH 03/24] Move server part from ui_support_cloud_storage && fix missing id field --- cvat/apps/engine/views.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 7d16c90a4779..450353b5eb3f 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1118,6 +1118,18 @@ def process_result(self, result, method_name, obj, **kwargs): 'supported: range=aws_range' return result +class CloudStorageFilter(filters.FilterSet): + display_name = filters.CharFilter(field_name='display_name', lookup_expr='icontains') + provider_type = filters.CharFilter(field_name='provider_type', lookup_expr='icontains') + resource = filters.CharFilter(field_name='resource', lookup_expr='icontains') + credentials_type = filters.CharFilter(field_name='credentials_type', lookup_expr='icontains') + description = filters.CharFilter(field_name='description', lookup_expr='icontains') + owner = filters.CharFilter(field_name='owner__username', lookup_expr='icontains') + + class Meta: + model = models.CloudStorage + fields = ('id', 'display_name', 'provider_type', 'resource', 'credentials_type', 'description', 'owner') + @method_decorator( name='retrieve', decorator=swagger_auto_schema( @@ -1157,8 +1169,8 @@ def process_result(self, result, method_name, obj, **kwargs): class CloudStorageViewSet(auth.CloudStorageGetQuerySetMixin, viewsets.ModelViewSet): http_method_names = ['get', 'post', 'patch', 'delete'] queryset = CloudStorageModel.objects.all().prefetch_related('data').order_by('-id') - search_fields = ('provider_type', 'display_name', 'resource', 'owner__username') - filterset_fields = ['provider_type', 'display_name', 'resource', 'credentials_type'] + search_fields = ('provider_type', 'display_name', 'resource', 'credentials_type', 'owner__username', 'description') + filterset_class = CloudStorageFilter def get_permissions(self): http_method = self.request.method From 0aebb6edccf6bc2cf1846138603bba6d7230f5eb Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 9 Jul 2021 11:44:50 +0300 Subject: [PATCH 04/24] Add support_key_secret_key_pair --- cvat/apps/engine/cloud_provider.py | 11 +++++++++++ .../migrations/0041_auto_20210709_0839.py | 17 +++++++++++++++++ cvat/apps/engine/models.py | 1 + 3 files changed, 29 insertions(+) create mode 100644 cvat/apps/engine/migrations/0041_auto_20210709_0839.py diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index db293cbd637c..8b3d63124e6a 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -104,6 +104,13 @@ def __init__(self, aws_session_token=session_token, region_name=region ) + elif access_key_id and secret_key: + self._s3 = boto3.resource( + 's3', + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_key, + region_name=region + ) elif any([access_key_id, secret_key, session_token]): raise Exception('Insufficient data for authorization') # anonymous access @@ -270,6 +277,8 @@ def convert_to_db(self): converted_credentials = { CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET : \ " ".join([self.key, self.secret_key, self.session_token]), + CredentialsTypeChoice.KEY_SECRET_KEY_PAIR : \ + " ".join([self.key, self.secret_key]), CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]), CredentialsTypeChoice.ANONYMOUS_ACCESS: "" if not self.account_name else self.account_name, } @@ -279,6 +288,8 @@ def convert_from_db(self, credentials): self.credentials_type = credentials.get('type') if self.credentials_type == CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET: self.key, self.secret_key, self.session_token = credentials.get('value').split() + elif self.credentials_type == CredentialsTypeChoice.KEY_SECRET_KEY_PAIR: + self.key, self.secret_key = credentials.get('value').split() elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR: self.account_name, self.session_token = credentials.get('value').split() else: diff --git a/cvat/apps/engine/migrations/0041_auto_20210709_0839.py b/cvat/apps/engine/migrations/0041_auto_20210709_0839.py new file mode 100644 index 000000000000..19f91847184f --- /dev/null +++ b/cvat/apps/engine/migrations/0041_auto_20210709_0839.py @@ -0,0 +1,17 @@ +# Generated by Django 3.1.10 on 2021-07-09 08:39 + +from django.db import migrations, models + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0040_cloud_storage'), + ] + + operations = [ + migrations.AlterField( + model_name='cloudstorage', + name='credentials_type', + field=models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_SET', 'TEMP_KEY_SECRET_KEY_TOKEN_SET'), ('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29), + ), + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index ef199d0268cc..7b7362438809 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -557,6 +557,7 @@ def __str__(self): class CredentialsTypeChoice(str, Enum): # ignore bandit issues because false positives TEMP_KEY_SECRET_KEY_TOKEN_SET = 'TEMP_KEY_SECRET_KEY_TOKEN_SET' # nosec + KEY_SECRET_KEY_PAIR = 'KEY_SECRET_KEY_PAIR' # nosec ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' # nosec ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS' From 41aa91d8779c09ed89b3ebc4a037a37201138e6b Mon Sep 17 00:00:00 2001 From: Maya Date: Sun, 18 Jul 2021 14:34:37 +0300 Subject: [PATCH 05/24] Fix several moments --- cvat/apps/engine/cloud_provider.py | 81 +++++++++++++++---- .../migrations/0041_auto_20210718_0931.py | 26 ++++++ cvat/apps/engine/models.py | 10 ++- cvat/apps/engine/serializers.py | 50 +++++++++++- cvat/apps/engine/views.py | 75 ++++++++--------- 5 files changed, 180 insertions(+), 62 deletions(-) create mode 100644 cvat/apps/engine/migrations/0041_auto_20210718_0931.py diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 8b3d63124e6a..103d8cddcc7d 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -27,6 +27,14 @@ def name(self): def create(self): pass + @abstractmethod + def _head_file(self, key): + pass + + @abstractmethod + def get_file_last_modified(self, key): + pass + @abstractmethod def exists(self): pass @@ -81,10 +89,39 @@ def get_cloud_storage_instance(cloud_provider, resource, credentials, specific_a raise NotImplementedError() return instance +def check_cloud_storage_existing(provider_type, + credentials_type, + session_token, + account_name, + key, + secret_key, + resource, + specific_attributes): + credentials = Credentials( + key=key, + secret_key=secret_key, + session_token=session_token, + account_name=account_name, + credentials_type=credentials_type) + details = { + 'resource': resource, + 'credentials': credentials, + 'specific_attributes': { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in specific_attributes.split('&') + } if len(specific_attributes) + else dict() + } + storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) + if not storage.exists(): + message = str('The resource {} unavailable'.format(resource)) + slogger.glob.error(message) + raise Exception(message) + class AWS_S3(_CloudStorage): waiter_config = { - 'Delay': 5, # The amount of time in seconds to wait between attempts. Default: 5 - 'MaxAttempts': 3, # The maximum number of attempts to be made. Default: 20 + 'Delay': 1, # The amount of time in seconds to wait between attempts. Default: 5 + 'MaxAttempts': 2, # The maximum number of attempts to be made. Default: 20 } transfer_config = { 'max_io_queue': 10, @@ -136,19 +173,30 @@ def exists(self): Bucket=self.name, WaiterConfig=self.waiter_config ) + return True except WaiterError: - raise Exception('A resource {} unavailable'.format(self.name)) + return False def is_object_exist(self, key_object): waiter = self._client_s3.get_waiter('object_exists') try: waiter.wait( - Bucket=self._bucket, + Bucket=self.name, Key=key_object, - WaiterConfig=self.waiter_config + WaiterConfig=self.waiter_config, ) + return True except WaiterError: - raise Exception('A file {} unavailable'.format(key_object)) + return False + + def _head_file(self, key): + return self._client_s3.head_object( + Bucket=self.name, + Key=key + ) + + def get_file_last_modified(self, key): + return self._head_file(key).get('LastModified') def upload_file(self, file_obj, file_name): self._bucket.upload_fileobj( @@ -229,12 +277,19 @@ def create(self): raise Exception(msg) def exists(self): - return self._container_client.exists(timeout=5) + return self._container_client.exists(timeout=2) def is_object_exist(self, file_name): blob_client = self._container_client.get_blob_client(file_name) return blob_client.exists() + def _head_file(self, key): + blob_client = self.container.get_blob_client(key) + return blob_client.get_blob_properties() + + def get_file_last_modified(self, key): + return self._head_file(key).last_modified + def upload_file(self, file_obj, file_name): self._container_client.upload_blob(name=file_name, data=file_obj) @@ -275,8 +330,6 @@ def __init__(self, **credentials): def convert_to_db(self): converted_credentials = { - CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET : \ - " ".join([self.key, self.secret_key, self.session_token]), CredentialsTypeChoice.KEY_SECRET_KEY_PAIR : \ " ".join([self.key, self.secret_key]), CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR : " ".join([self.account_name, self.session_token]), @@ -286,18 +339,16 @@ def convert_to_db(self): def convert_from_db(self, credentials): self.credentials_type = credentials.get('type') - if self.credentials_type == CredentialsTypeChoice.TEMP_KEY_SECRET_KEY_TOKEN_SET: - self.key, self.secret_key, self.session_token = credentials.get('value').split() - elif self.credentials_type == CredentialsTypeChoice.KEY_SECRET_KEY_PAIR: + if self.credentials_type == CredentialsTypeChoice.KEY_SECRET_KEY_PAIR: self.key, self.secret_key = credentials.get('value').split() elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR: self.account_name, self.session_token = credentials.get('value').split() - else: - # anonymous access + elif self.credentials_type == CredentialsTypeChoice.ANONYMOUS_ACCESS: self.session_token, self.key, self.secret_key = ('', '', '') # account_name will be in [some_value, ''] self.account_name = credentials.get('value') - self.credentials_type = None + else: + raise NotImplementedError('Found {} not supported credentials type'.format(self.credentials_type)) def mapping_with_new_values(self, credentials): self.credentials_type = credentials.get('credentials_type', self.credentials_type) diff --git a/cvat/apps/engine/migrations/0041_auto_20210718_0931.py b/cvat/apps/engine/migrations/0041_auto_20210718_0931.py new file mode 100644 index 000000000000..03e16a250f76 --- /dev/null +++ b/cvat/apps/engine/migrations/0041_auto_20210718_0931.py @@ -0,0 +1,26 @@ +# Generated by Django 3.1.10 on 2021-07-18 09:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0040_cloud_storage'), + ] + + operations = [ + migrations.AlterField( + model_name='cloudstorage', + name='credentials_type', + field=models.CharField(choices=[('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29), + ), + migrations.CreateModel( + name='Manifest', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('filename', models.CharField(default='manifest.jsonl', max_length=1024)), + ('cloud_storages', models.ManyToManyField(to='engine.CloudStorage')), + ], + ), + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 7b7362438809..c0d3a5faeb1f 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -556,7 +556,6 @@ def __str__(self): class CredentialsTypeChoice(str, Enum): # ignore bandit issues because false positives - TEMP_KEY_SECRET_KEY_TOKEN_SET = 'TEMP_KEY_SECRET_KEY_TOKEN_SET' # nosec KEY_SECRET_KEY_PAIR = 'KEY_SECRET_KEY_PAIR' # nosec ACCOUNT_NAME_TOKEN_PAIR = 'ACCOUNT_NAME_TOKEN_PAIR' # nosec ANONYMOUS_ACCESS = 'ANONYMOUS_ACCESS' @@ -572,6 +571,13 @@ def list(cls): def __str__(self): return self.value +class Manifest(models.Model): + filename = models.CharField(max_length=1024, default='manifest.jsonl') + cloud_storages = models.ManyToManyField('CloudStorage') + + def __str__(self): + return '{}'.format(self.filename) + class CloudStorage(models.Model): # restrictions: # AWS bucket name, Azure container name - 63 @@ -607,7 +613,7 @@ def get_storage_logs_dirname(self): return os.path.join(self.get_storage_dirname(), 'logs') def get_log_path(self): - return os.path.join(self.get_storage_dirname(), "storage.log") + return os.path.join(self.get_storage_logs_dirname(), "storage.log") def get_preview_path(self): return os.path.join(self.get_storage_dirname(), 'preview.jpeg') diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 7ed8311a98cd..73507f236d58 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -11,7 +11,7 @@ from cvat.apps.dataset_manager.formats.utils import get_label_color from cvat.apps.engine import models -from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, check_cloud_storage_existing, Credentials from cvat.apps.engine.log import slogger class BasicUserSerializer(serializers.ModelSerializer): @@ -771,8 +771,22 @@ def create(self, validated_data): return db_review +class ManifestSerializer(serializers.ModelSerializer): + class Meta: + model = models.Manifest + fields = ('filename', ) + + # pylint: disable=no-self-use + def to_internal_value(self, data): + return {'filename': data.get('filename')} + + # pylint: disable=no-self-use + def to_representation(self, instance): + return instance.filename if instance else instance + class BaseCloudStorageSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(required=False) + manifest_set = ManifestSerializer(many=True, default=[]) class Meta: model = models.CloudStorage exclude = ['credentials'] @@ -784,13 +798,14 @@ class CloudStorageSerializer(serializers.ModelSerializer): key = serializers.CharField(max_length=20, allow_blank=True, required=False) secret_key = serializers.CharField(max_length=40, allow_blank=True, required=False) account_name = serializers.CharField(max_length=24, allow_blank=True, required=False) + manifest_set = ManifestSerializer(many=True, default=[]) class Meta: model = models.CloudStorage fields = ( 'provider_type', 'resource', 'display_name', 'owner', 'credentials_type', 'created_date', 'updated_date', 'session_token', 'account_name', 'key', - 'secret_key', 'specific_attributes', 'description', 'id' + 'secret_key', 'specific_attributes', 'description', 'id', 'manifest_set', ) read_only_fields = ('created_date', 'updated_date', 'owner') @@ -836,12 +851,21 @@ def create(self, validated_data): slogger.glob.warning("Failed with creating storage\n{}".format(str(ex))) raise + manifest_set = validated_data.pop('manifest_set') + db_storage = models.CloudStorage.objects.create( credentials=credentials.convert_to_db(), **validated_data ) db_storage.save() + manifest_file_instances = [] + for manifest in manifest_set: + manifest_file = models.Manifest(**manifest) + manifest_file.save() + manifest_file_instances.append(manifest_file) + db_storage.manifest_set.add(*manifest_file_instances) + cloud_storage_path = db_storage.get_storage_dirname() if os.path.isdir(cloud_storage_path): shutil.rmtree(cloud_storage_path) @@ -862,9 +886,27 @@ def update(self, instance, validated_data): instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type) instance.resource = validated_data.get('resource', instance.resource) instance.display_name = validated_data.get('display_name', instance.display_name) - instance.description = validated_data.get('description', instance.description) - instance.specific_attributes = validated_data.get('specific_attributes', instance.specific_attributes) + check_cloud_storage_existing(instance.provider_type, instance.credentials_type, credentials.session_token, + credentials.account_name, credentials.key, credentials.secret_key, instance.resource, instance.specific_attributes) + + new_manifest_names = set(i.get('filename') for i in validated_data.get('manifest_set', [])) + previos_manifest_names = set(i.filename for i in instance.manifest_set.all()) + delta_to_delete = tuple(previos_manifest_names - new_manifest_names) + delta_to_create = tuple(new_manifest_names - previos_manifest_names) + if delta_to_delete: + instance.manifest_set.filter(filename__in=delta_to_delete).delete() + if delta_to_create: + # we cannot use bulk_create because It does not work with many-to-many relationships + manifest_instances = [] + for item in delta_to_create: + if not models.Manifest.objects.filter(filename=item): + manifest = models.Manifest(filename=item) + manifest.save() + else: + manifest = models.Manifest.objects.get(filename=item) + manifest_instances.append(manifest) + instance.manifest_set.add(*manifest_instances) instance.save() return instance diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 450353b5eb3f..936fc06327fb 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -41,7 +41,7 @@ import cvat.apps.dataset_manager as dm import cvat.apps.dataset_manager.views # pylint: disable=unused-import from cvat.apps.authentication import auth -from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, check_cloud_storage_existing, Credentials from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider @@ -1202,29 +1202,16 @@ def get_queryset(self): def perform_create(self, serializer): # check that instance of cloud storage exists provider_type = serializer.validated_data.get('provider_type') - credentials = Credentials( - session_token=serializer.validated_data.get('session_token', ''), - account_name=serializer.validated_data.get('account_name', ''), - key=serializer.validated_data.get('key', ''), - secret_key=serializer.validated_data.get('secret_key', '') - ) - details = { - 'resource': serializer.validated_data.get('resource'), - 'credentials': credentials, - 'specific_attributes': { - item.split('=')[0].strip(): item.split('=')[1].strip() - for item in serializer.validated_data.get('specific_attributes').split('&') - } if len(serializer.validated_data.get('specific_attributes', '')) - else dict() - } - storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) - try: - storage.exists() - except Exception as ex: - message = str(ex) - slogger.glob.error(message) - raise - + credentials_type = serializer.validated_data.get('credentials_type') + session_token = serializer.validated_data.get('session_token', '') + account_name = serializer.validated_data.get('account_name', '') + key = serializer.validated_data.get('key', '') + secret_key = serializer.validated_data.get('secret_key', '') + resource = serializer.validated_data.get('resource') + specific_attributes = serializer.validated_data.get('specific_attributes', '') + + check_cloud_storage_existing(provider_type, credentials_type, session_token, account_name, + key, secret_key, resource, specific_attributes) owner = self.request.data.get('owner') if owner: serializer.save() @@ -1265,14 +1252,14 @@ def create(self, request, *args, **kwargs): @swagger_auto_schema( method='get', - operation_summary='Method returns a mapped names of an available files from a storage and a manifest content', + operation_summary='Method returns a manifest content', manual_parameters=[ openapi.Parameter('manifest_path', openapi.IN_QUERY, description="Path to the manifest file in a cloud storage", type=openapi.TYPE_STRING) ], responses={ - '200': openapi.Response(description='Mapped names of an available files from a storage and a manifest content'), + '200': openapi.Response(description='A manifest content'), }, tags=['cloud storages'] ) @@ -1291,28 +1278,32 @@ def content(self, request, pk): 'specific_attributes': db_storage.get_specific_attributes() } storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) - storage.initialize_content() - storage_files = storage.content - manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') - with TemporaryDirectory(suffix='manifest', prefix='cvat') as tmp_dir: - tmp_manifest_path = os.path.join(tmp_dir, 'manifest.jsonl') - storage.download_file(manifest_path, tmp_manifest_path) - manifest = ImageManifestManager(tmp_manifest_path) + if not storage.is_object_exist(manifest_path): + import errno + raise FileNotFoundError(errno.ENOENT, + "Not found on the cloud storage {}".format(db_storage.display_name), manifest_path) + + full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_path) + if not os.path.exist(full_manifest_path) or \ + os.path.getmtime(full_manifest_path) < storage.get_file_last_modified(full_manifest_path): + # TODO: create sub dirs + storage.download_file(manifest_path, full_manifest_path) + manifest = ImageManifestManager(full_manifest_path) + # need to reset previon index + manifest.reset_index() manifest.init_index() manifest_files = manifest.data - content = {f:[] for f in set(storage_files) | set(manifest_files)} - for key, _ in content.items(): - if key in storage_files: content[key].append('s') # storage - if key in manifest_files: content[key].append('m') # manifest - - data = json.dumps(content) - return Response(data=data, content_type="aplication/json") + return Response(data=manifest_files, content_type="text/plain") except CloudStorageModel.DoesNotExist: message = f"Storage {pk} does not exist" slogger.glob.error(message) return HttpResponseNotFound(message) + except FileNotFoundError as ex: + msg = f"{ex.strerror} {ex.filename}" + slogger.cloud_storage[pk].info(msg) + return Response(data=msg, status=status.HTTP_404_NOT_FOUND) except Exception as ex: return HttpResponseBadRequest(str(ex)) @@ -1343,7 +1334,9 @@ def preview(self, request, pk): storage.initialize_content() storage_images = [f for f in storage.content if MEDIA_TYPES['image']['has_mime_type'](f)] if not len(storage_images): - return HttpResponseBadRequest('Cloud storage {} does not contain any images'.format(pk)) + msg = 'Cloud storage {} does not contain any images'.format(pk) + slogger.cloud_storage[pk].info(msg) + return HttpResponseBadRequest(msg) with NamedTemporaryFile() as temp_image: storage.download_file(storage_images[0], temp_image.name) reader = ImageListReader([temp_image.name]) From 7e56fa92ca7f8005388bc93699b1cb43bb54fc16 Mon Sep 17 00:00:00 2001 From: Maya Date: Sun, 18 Jul 2021 14:35:05 +0300 Subject: [PATCH 06/24] Add index resetting --- utils/dataset_manifest/core.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index b357daf9b58e..21022597ef50 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -223,6 +223,9 @@ def load(self): self._index = json.load(index_file, object_hook=lambda d: {int(k): v for k, v in d.items()}) + def remove(self): + os.remove(self._path) + def create(self, manifest, skip): assert os.path.exists(manifest), 'A manifest file not exists, index cannot be created' with open(manifest, 'r+') as manifest_file: @@ -290,6 +293,11 @@ def init_index(self): self._index.create(self._manifest.path, 3 if self._manifest.TYPE == 'video' else 2) self._index.dump() + def reset_index(self): + self._index = _Index(os.path.dirname(self._manifest.path)) + if os.path.exists(self._index.path): + self._index.remove() + @abstractmethod def create(self, content, **kwargs): pass From f94ef7c3af8587a1873b03139b216d515d6d6211 Mon Sep 17 00:00:00 2001 From: Maya Date: Sun, 18 Jul 2021 14:39:04 +0300 Subject: [PATCH 07/24] Fix pylint errors --- cvat/apps/engine/views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 936fc06327fb..5f8f32686357 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MIT import io -import json import os import os.path as osp import shutil @@ -11,7 +10,7 @@ import uuid from datetime import datetime from distutils.util import strtobool -from tempfile import mkstemp, NamedTemporaryFile, TemporaryDirectory +from tempfile import mkstemp, NamedTemporaryFile from PIL import Image as PILImage import cv2 From 1f2915baebcf9f6030f1f30329f044f326af48d7 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 2 Aug 2021 10:25:36 +0300 Subject: [PATCH 08/24] Remove excess migration --- .../migrations/0041_auto_20210709_0839.py | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 cvat/apps/engine/migrations/0041_auto_20210709_0839.py diff --git a/cvat/apps/engine/migrations/0041_auto_20210709_0839.py b/cvat/apps/engine/migrations/0041_auto_20210709_0839.py deleted file mode 100644 index 19f91847184f..000000000000 --- a/cvat/apps/engine/migrations/0041_auto_20210709_0839.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 3.1.10 on 2021-07-09 08:39 - -from django.db import migrations, models - -class Migration(migrations.Migration): - - dependencies = [ - ('engine', '0040_cloud_storage'), - ] - - operations = [ - migrations.AlterField( - model_name='cloudstorage', - name='credentials_type', - field=models.CharField(choices=[('TEMP_KEY_SECRET_KEY_TOKEN_SET', 'TEMP_KEY_SECRET_KEY_TOKEN_SET'), ('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29), - ), - ] From 9a8faf4e1c4f0f1a57b3b07adc2ccbedecc2a7a7 Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 10 Aug 2021 16:17:23 +0300 Subject: [PATCH 09/24] tmp --- cvat/apps/engine/serializers.py | 2 +- cvat/apps/engine/views.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 73507f236d58..0b31566d075b 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -778,7 +778,7 @@ class Meta: # pylint: disable=no-self-use def to_internal_value(self, data): - return {'filename': data.get('filename')} + return {'filename': data } # pylint: disable=no-self-use def to_representation(self, instance): diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 5f8f32686357..fd0ae0ffb9e6 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1277,6 +1277,8 @@ def content(self, request, pk): 'specific_attributes': db_storage.get_specific_attributes() } storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) + if not db_storage.manifest_set.count(): + raise Exception('There is no manifest file') manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') if not storage.is_object_exist(manifest_path): import errno @@ -1284,15 +1286,15 @@ def content(self, request, pk): "Not found on the cloud storage {}".format(db_storage.display_name), manifest_path) full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_path) - if not os.path.exist(full_manifest_path) or \ - os.path.getmtime(full_manifest_path) < storage.get_file_last_modified(full_manifest_path): + if not os.path.exists(full_manifest_path): + # or \ os.path.getmtime(full_manifest_path) < storage.get_file_last_modified(full_manifest_path): # TODO: create sub dirs storage.download_file(manifest_path, full_manifest_path) - manifest = ImageManifestManager(full_manifest_path) - # need to reset previon index - manifest.reset_index() - manifest.init_index() - manifest_files = manifest.data + manifest = ImageManifestManager(full_manifest_path) + # need to reset previon index + manifest.reset_index() + manifest.init_index() + manifest_files = manifest.data return Response(data=manifest_files, content_type="text/plain") except CloudStorageModel.DoesNotExist: From 8fb8207d050ec9acba04df83271e65c3523ec383 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 12 Aug 2021 18:31:07 +0300 Subject: [PATCH 10/24] Some fixes --- cvat/apps/engine/cache.py | 48 ++++++++++++++++++++-------------- cvat/apps/engine/task.py | 9 ++++++- cvat/apps/engine/views.py | 14 ++++++++-- utils/dataset_manifest/core.py | 21 +++++++++++++-- 4 files changed, 67 insertions(+), 25 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index be450d4dbca5..8c63c54d5185 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -71,6 +71,7 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): step=db_data.get_frame_step()) if db_data.storage == StorageChoice.CLOUD_STORAGE: db_cloud_storage = db_data.cloud_storage + assert db_cloud_storage, 'Cloud storage instance was deleted' credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, @@ -81,26 +82,33 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } - cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) - cloud_storage_instance.initialize_content() - for item in reader: - # full_name may be 'sub_dir/image.jpeg' - full_name = f"{item['name']}{item['extension']}" - if full_name not in cloud_storage_instance: - raise Exception('{} file was not found on a {} storage'.format(full_name, cloud_storage_instance.name)) - head, file_name = os.path.split(full_name) - abs_head = os.path.join(gettempdir(), head) - os.makedirs(abs_head, exist_ok=True) - with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name, delete=False, dir=abs_head) as temp_file: - source_path = temp_file.name - buf = cloud_storage_instance.download_fileobj(full_name) - temp_file.write(buf.getvalue()) - checksum = item.get('checksum', None) - if not checksum: - slogger.glob.warning('A manifest file does not contain checksum for image {}'.format(item.get('name'))) - if checksum and not md5_hash(source_path) == checksum: - slogger.glob.warning('Hash sums of files {} do not match'.format(full_name)) - images.append((source_path, source_path, None)) + try: + cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) + cloud_storage_instance.initialize_content() + for item in reader: + # full_name may be 'sub_dir/image.jpeg' + full_name = f"{item['name']}{item['extension']}" + if full_name not in cloud_storage_instance: + raise Exception('{} file was not found on a {} storage'.format(full_name, cloud_storage_instance.name)) + head, file_name = os.path.split(full_name) + abs_head = os.path.join(gettempdir(), head) + os.makedirs(abs_head, exist_ok=True) + with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name, delete=False, dir=abs_head) as temp_file: + source_path = temp_file.name + buf = cloud_storage_instance.download_fileobj(full_name) + temp_file.write(buf.getvalue()) + checksum = item.get('checksum', None) + if not checksum: + slogger.cloud_storage[db_cloud_storage.id].warning('A manifest file does not contain checksum for image {}'.format(item.get('name'))) + if checksum and not md5_hash(source_path) == checksum: + slogger.cloud_storage[db_cloud_storage.id].warning('Hash sums of files {} do not match'.format(full_name)) + images.append((source_path, source_path, None)) + except Exception as ex: + if not cloud_storage_instance.exists(): + msg = 'The resource {} is no longer available. It may have been deleted'.format(cloud_storage_instance.name) + else: + msg = str(ex) + raise Exception(msg) else: for item in reader: source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}") diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index cc6a5ffa9af2..d9042c26a6ae 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -252,7 +252,6 @@ def _create_thread(tid, data, isImport=False): 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) - cloud_storage_instance.download_file(manifest_file[0], db_data.get_manifest_path()) first_sorted_media_image = sorted(media['image'])[0] cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) @@ -368,6 +367,14 @@ def update_progress(progress): w, h = extractor.get_image_size(0) else: manifest = ImageManifestManager(db_data.get_manifest_path()) + # prepare task manifest file from cloud storage manifest file + cloud_storage_manifest = ImageManifestManager( + os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) + ) + cloud_storage_manifest.init_index() + media_files = sorted(media['image']) + content = cloud_storage_manifest.get_subset(media_files) + manifest.create(content) manifest.init_index() img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index fd0ae0ffb9e6..45213e50bf39 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1306,7 +1306,12 @@ def content(self, request, pk): slogger.cloud_storage[pk].info(msg) return Response(data=msg, status=status.HTTP_404_NOT_FOUND) except Exception as ex: - return HttpResponseBadRequest(str(ex)) + # check that cloud storage was not deleted + if not storage.exists(): + msg = 'The resource {} is no longer available. It may have been deleted'.format(storage.name) + else: + msg = str(ex) + return HttpResponseBadRequest(msg) @swagger_auto_schema( method='get', @@ -1353,7 +1358,12 @@ def preview(self, request, pk): slogger.glob.error(message) return HttpResponseNotFound(message) except Exception as ex: - return HttpResponseBadRequest(str(ex)) + # check that cloud storage was not deleted + if not storage.exists(): + msg = 'The resource {} is no longer available. It may have been deleted'.format(storage.name) + else: + msg = str(ex) + return HttpResponseBadRequest(msg) def rq_handler(job, exc_type, exc_value, tb): job.exc_info = "".join( diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 21022597ef50..0a3829984ff8 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -339,6 +339,10 @@ def index(self): def data(self): pass + @abstractmethod + def get_subset(self, subset_names): + pass + class VideoManifestManager(_ManifestManager): def __init__(self, manifest_path): super().__init__(manifest_path) @@ -402,7 +406,10 @@ def video_length(self): @property def data(self): - return [self.video_name] + return (self.video_name) + + def get_subset(self, subset_names): + raise NotImplementedError() #TODO: add generic manifest structure file validation class ManifestValidator: @@ -484,4 +491,14 @@ def prepare_meta(sources, **kwargs): @property def data(self): - return [f"{image['name']}{image['extension']}" for _, image in self] \ No newline at end of file + return (f"{image['name']}{image['extension']}" for _, image in self) + + def get_subset(self, subset_names): + return ({ + 'name': f"{image['name']}", + 'extension': f"{image['extension']}", + 'width': image['width'], + 'height': image['height'], + 'meta': image['meta'], + 'checksum': f"{image['checksum']}" + } for _, image in self if f"{image['name']}{image['extension']}" in subset_names) \ No newline at end of file From 070dbcff389d7293b13877c5f83a94cf1288f0a6 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 13 Aug 2021 14:17:24 +0300 Subject: [PATCH 11/24] Fixes --- ...718_0931.py => 0041_auto_20210813_0853.py} | 5 +-- cvat/apps/engine/models.py | 2 +- cvat/apps/engine/serializers.py | 34 ++++++------------- cvat/apps/engine/views.py | 13 ++++--- 4 files changed, 23 insertions(+), 31 deletions(-) rename cvat/apps/engine/migrations/{0041_auto_20210718_0931.py => 0041_auto_20210813_0853.py} (77%) diff --git a/cvat/apps/engine/migrations/0041_auto_20210718_0931.py b/cvat/apps/engine/migrations/0041_auto_20210813_0853.py similarity index 77% rename from cvat/apps/engine/migrations/0041_auto_20210718_0931.py rename to cvat/apps/engine/migrations/0041_auto_20210813_0853.py index 03e16a250f76..c66e76135b49 100644 --- a/cvat/apps/engine/migrations/0041_auto_20210718_0931.py +++ b/cvat/apps/engine/migrations/0041_auto_20210813_0853.py @@ -1,6 +1,7 @@ -# Generated by Django 3.1.10 on 2021-07-18 09:31 +# Generated by Django 3.1.13 on 2021-08-13 08:53 from django.db import migrations, models +import django.db.models.deletion class Migration(migrations.Migration): @@ -20,7 +21,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('filename', models.CharField(default='manifest.jsonl', max_length=1024)), - ('cloud_storages', models.ManyToManyField(to='engine.CloudStorage')), + ('cloud_storage', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='manifests', to='engine.cloudstorage')), ], ), ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index c0d3a5faeb1f..544fb0e0b99f 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -573,7 +573,7 @@ def __str__(self): class Manifest(models.Model): filename = models.CharField(max_length=1024, default='manifest.jsonl') - cloud_storages = models.ManyToManyField('CloudStorage') + cloud_storage = models.ForeignKey('CloudStorage', on_delete=models.CASCADE, null=True, related_name='manifests') def __str__(self): return '{}'.format(self.filename) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 0b31566d075b..5b4b44e143ed 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -786,7 +786,7 @@ def to_representation(self, instance): class BaseCloudStorageSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(required=False) - manifest_set = ManifestSerializer(many=True, default=[]) + manifests = ManifestSerializer(many=True, default=[]) class Meta: model = models.CloudStorage exclude = ['credentials'] @@ -798,14 +798,14 @@ class CloudStorageSerializer(serializers.ModelSerializer): key = serializers.CharField(max_length=20, allow_blank=True, required=False) secret_key = serializers.CharField(max_length=40, allow_blank=True, required=False) account_name = serializers.CharField(max_length=24, allow_blank=True, required=False) - manifest_set = ManifestSerializer(many=True, default=[]) + manifests = ManifestSerializer(many=True, default=[]) class Meta: model = models.CloudStorage fields = ( 'provider_type', 'resource', 'display_name', 'owner', 'credentials_type', 'created_date', 'updated_date', 'session_token', 'account_name', 'key', - 'secret_key', 'specific_attributes', 'description', 'id', 'manifest_set', + 'secret_key', 'specific_attributes', 'description', 'id', 'manifests', ) read_only_fields = ('created_date', 'updated_date', 'owner') @@ -851,7 +851,7 @@ def create(self, validated_data): slogger.glob.warning("Failed with creating storage\n{}".format(str(ex))) raise - manifest_set = validated_data.pop('manifest_set') + manifests = validated_data.pop('manifests') db_storage = models.CloudStorage.objects.create( credentials=credentials.convert_to_db(), @@ -859,12 +859,8 @@ def create(self, validated_data): ) db_storage.save() - manifest_file_instances = [] - for manifest in manifest_set: - manifest_file = models.Manifest(**manifest) - manifest_file.save() - manifest_file_instances.append(manifest_file) - db_storage.manifest_set.add(*manifest_file_instances) + manifest_file_instances = [models.Manifest(**manifest, cloud_storage=db_storage) for manifest in manifests] + models.Manifest.objects.bulk_create(manifest_file_instances) cloud_storage_path = db_storage.get_storage_dirname() if os.path.isdir(cloud_storage_path): @@ -890,23 +886,15 @@ def update(self, instance, validated_data): check_cloud_storage_existing(instance.provider_type, instance.credentials_type, credentials.session_token, credentials.account_name, credentials.key, credentials.secret_key, instance.resource, instance.specific_attributes) - new_manifest_names = set(i.get('filename') for i in validated_data.get('manifest_set', [])) - previos_manifest_names = set(i.filename for i in instance.manifest_set.all()) + new_manifest_names = set(i.get('filename') for i in validated_data.get('manifests', [])) + previos_manifest_names = set(i.filename for i in instance.manifests.all()) delta_to_delete = tuple(previos_manifest_names - new_manifest_names) delta_to_create = tuple(new_manifest_names - previos_manifest_names) if delta_to_delete: - instance.manifest_set.filter(filename__in=delta_to_delete).delete() + instance.manifests.filter(filename__in=delta_to_delete).delete() if delta_to_create: - # we cannot use bulk_create because It does not work with many-to-many relationships - manifest_instances = [] - for item in delta_to_create: - if not models.Manifest.objects.filter(filename=item): - manifest = models.Manifest(filename=item) - manifest.save() - else: - manifest = models.Manifest.objects.get(filename=item) - manifest_instances.append(manifest) - instance.manifest_set.add(*manifest_instances) + manifest_instances = [models.Manifest(filename=f, cloud_storage=instance.id) for f in delta_to_create] + models.Manifest.objects.bulk_create(manifest_instances) instance.save() return instance diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 45213e50bf39..3e690d25fa6c 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -2,9 +2,11 @@ # # SPDX-License-Identifier: MIT +import errno import io import os import os.path as osp +import pytz import shutil import traceback import uuid @@ -1277,18 +1279,19 @@ def content(self, request, pk): 'specific_attributes': db_storage.get_specific_attributes() } storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) - if not db_storage.manifest_set.count(): + if not db_storage.manifests.count(): raise Exception('There is no manifest file') manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') if not storage.is_object_exist(manifest_path): - import errno raise FileNotFoundError(errno.ENOENT, "Not found on the cloud storage {}".format(db_storage.display_name), manifest_path) full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_path) - if not os.path.exists(full_manifest_path): - # or \ os.path.getmtime(full_manifest_path) < storage.get_file_last_modified(full_manifest_path): - # TODO: create sub dirs + if not os.path.exists(full_manifest_path) or \ + datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_path): + # create sub dirs + if os.path.dirname(manifest_path): + os.makedirs(os.path.dirname(full_manifest_path), exist_ok=True) storage.download_file(manifest_path, full_manifest_path) manifest = ImageManifestManager(full_manifest_path) # need to reset previon index From b3252b1542a4796c97369863f6ccff80a13f5ace Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 13 Aug 2021 22:27:50 +0300 Subject: [PATCH 12/24] fix --- cvat/apps/engine/serializers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 5b4b44e143ed..1af0ab45ba3a 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -893,7 +893,7 @@ def update(self, instance, validated_data): if delta_to_delete: instance.manifests.filter(filename__in=delta_to_delete).delete() if delta_to_create: - manifest_instances = [models.Manifest(filename=f, cloud_storage=instance.id) for f in delta_to_create] + manifest_instances = [models.Manifest(filename=f, cloud_storage=instance) for f in delta_to_create] models.Manifest.objects.bulk_create(manifest_instances) instance.save() return instance From deab61b3e704987c187be492cd8960f7b53b849e Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 13:45:35 +0300 Subject: [PATCH 13/24] [server] Add cloud storage status && fixes --- cvat/apps/engine/cache.py | 21 ++-- cvat/apps/engine/cloud_provider.py | 166 ++++++++++++++++------------- cvat/apps/engine/serializers.py | 127 +++++++++++++++------- cvat/apps/engine/task.py | 21 ++-- cvat/apps/engine/views.py | 116 +++++++++++++------- utils/dataset_manifest/core.py | 3 +- 6 files changed, 275 insertions(+), 179 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 8c63c54d5185..fd29d8211403 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -86,22 +86,18 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) cloud_storage_instance.initialize_content() for item in reader: - # full_name may be 'sub_dir/image.jpeg' - full_name = f"{item['name']}{item['extension']}" - if full_name not in cloud_storage_instance: - raise Exception('{} file was not found on a {} storage'.format(full_name, cloud_storage_instance.name)) - head, file_name = os.path.split(full_name) - abs_head = os.path.join(gettempdir(), head) - os.makedirs(abs_head, exist_ok=True) - with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name, delete=False, dir=abs_head) as temp_file: + file_name = f"{item['name']}{item['extension']}" + if file_name not in cloud_storage_instance: + raise Exception('{} file was not found on a {} storage'.format(file_name, cloud_storage_instance.name)) + with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name.replace(os.path.sep, '#'), delete=False) as temp_file: source_path = temp_file.name - buf = cloud_storage_instance.download_fileobj(full_name) + buf = cloud_storage_instance.download_fileobj(file_name) temp_file.write(buf.getvalue()) checksum = item.get('checksum', None) if not checksum: slogger.cloud_storage[db_cloud_storage.id].warning('A manifest file does not contain checksum for image {}'.format(item.get('name'))) if checksum and not md5_hash(source_path) == checksum: - slogger.cloud_storage[db_cloud_storage.id].warning('Hash sums of files {} do not match'.format(full_name)) + slogger.cloud_storage[db_cloud_storage.id].warning('Hash sums of files {} do not match'.format(file_name)) images.append((source_path, source_path, None)) except Exception as ex: if not cloud_storage_instance.exists(): @@ -116,14 +112,9 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): writer.save_as_chunk(images, buff) buff.seek(0) if db_data.storage == StorageChoice.CLOUD_STORAGE: - tmp = gettempdir() - created_dirs = set(filter(lambda x: x if x.lstrip(tmp) else None, [os.path.dirname(i[0]) for i in images])) images = [image[0] for image in images if os.path.exists(image[0])] for image_path in images: os.remove(image_path) - for created_dir in created_dirs: - if not os.listdir(created_dir): - os.rmdir(created_dir) return buff, mime_type def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type): diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 103d8cddcc7d..47f8672ba518 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -1,10 +1,16 @@ -#from dataclasses import dataclass +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os +import boto3 + from abc import ABC, abstractmethod, abstractproperty +from enum import Enum from io import BytesIO -import boto3 from boto3.s3.transfer import TransferConfig -from botocore.exceptions import WaiterError +from botocore.exceptions import ClientError from botocore.handlers import disable_signing from azure.storage.blob import BlobServiceClient @@ -14,6 +20,18 @@ from cvat.apps.engine.log import slogger from cvat.apps.engine.models import CredentialsTypeChoice, CloudProviderChoice +class Status(str, Enum): + AVAILABLE = 'AVAILABLE' + NOT_FOUND = 'NOT_FOUND' + FORBIDDEN = 'FORBIDDEN' + + @classmethod + def choices(cls): + return tuple((x.value, x.name) for x in cls) + + def __str__(self): + return self.value + class _CloudStorage(ABC): def __init__(self): @@ -32,11 +50,19 @@ def _head_file(self, key): pass @abstractmethod - def get_file_last_modified(self, key): + def _head(self): + pass + + @abstractmethod + def get_status(self): pass @abstractmethod - def exists(self): + def get_file_status(self, key): + pass + + @abstractmethod + def get_file_last_modified(self, key): pass @abstractmethod @@ -50,6 +76,7 @@ def download_fileobj(self, key): def download_file(self, key, path): file_obj = self.download_fileobj(key) if isinstance(file_obj, BytesIO): + os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'wb') as f: f.write(file_obj.getvalue()) else: @@ -89,40 +116,7 @@ def get_cloud_storage_instance(cloud_provider, resource, credentials, specific_a raise NotImplementedError() return instance -def check_cloud_storage_existing(provider_type, - credentials_type, - session_token, - account_name, - key, - secret_key, - resource, - specific_attributes): - credentials = Credentials( - key=key, - secret_key=secret_key, - session_token=session_token, - account_name=account_name, - credentials_type=credentials_type) - details = { - 'resource': resource, - 'credentials': credentials, - 'specific_attributes': { - item.split('=')[0].strip(): item.split('=')[1].strip() - for item in specific_attributes.split('&') - } if len(specific_attributes) - else dict() - } - storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) - if not storage.exists(): - message = str('The resource {} unavailable'.format(resource)) - slogger.glob.error(message) - raise Exception(message) - class AWS_S3(_CloudStorage): - waiter_config = { - 'Delay': 1, # The amount of time in seconds to wait between attempts. Default: 5 - 'MaxAttempts': 2, # The maximum number of attempts to be made. Default: 20 - } transfer_config = { 'max_io_queue': 10, } @@ -166,34 +160,35 @@ def bucket(self): def name(self): return self._bucket.name - def exists(self): - waiter = self._client_s3.get_waiter('bucket_exists') - try: - waiter.wait( - Bucket=self.name, - WaiterConfig=self.waiter_config - ) - return True - except WaiterError: - return False - - def is_object_exist(self, key_object): - waiter = self._client_s3.get_waiter('object_exists') - try: - waiter.wait( - Bucket=self.name, - Key=key_object, - WaiterConfig=self.waiter_config, - ) - return True - except WaiterError: - return False + def _head(self): + return self._client_s3.head_bucket(Bucket=self.name) def _head_file(self, key): - return self._client_s3.head_object( - Bucket=self.name, - Key=key - ) + return self._client_s3.head_object(Bucket=self.name, Key=key) + + def get_status(self): + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.head_object + # return only 3 codes: 200, 403, 404 + try: + self._head() + return Status.AVAILABLE + except ClientError as ex: + code = ex.response['Error']['Code'] + if code == '403': + return Status.FORBIDDEN + else: + return Status.NOT_FOUND + + def get_file_status(self, key): + try: + self._head_file(key) + return Status.AVAILABLE + except ClientError as ex: + code = ex.response['Error']['Code'] + if code == '403': + return Status.FORBIDDEN + else: + return Status.NOT_FOUND def get_file_last_modified(self, key): return self._head_file(key).get('LastModified') @@ -276,12 +271,24 @@ def create(self): slogger.glob.info(msg) raise Exception(msg) - def exists(self): - return self._container_client.exists(timeout=2) + def _head(self): + #TODO + raise NotImplementedError() + + def get_status(self): + # TODO + raise NotImplementedError() + + def get_file_status(self, key): + # TODO + raise NotImplementedError() + + # def exists(self): + # return self._container_client.exists(timeout=2) - def is_object_exist(self, file_name): - blob_client = self._container_client.get_blob_client(file_name) - return blob_client.exists() + # def is_object_exist(self, file_name): + # blob_client = self._container_client.get_blob_client(file_name) + # return blob_client.exists() def _head_file(self, key): blob_client = self.container.get_blob_client(key) @@ -352,10 +359,23 @@ def convert_from_db(self, credentials): def mapping_with_new_values(self, credentials): self.credentials_type = credentials.get('credentials_type', self.credentials_type) - self.key = credentials.get('key', self.key) - self.secret_key = credentials.get('secret_key', self.secret_key) - self.session_token = credentials.get('session_token', self.session_token) - self.account_name = credentials.get('account_name', self.account_name) + if self.credentials_type == CredentialsTypeChoice.ANONYMOUS_ACCESS: + self.key = '' + self.secret_key = '' + self.session_token = '' + self.account_name = credentials.get('account_name', self.account_name) + elif self.credentials_type == CredentialsTypeChoice.KEY_SECRET_KEY_PAIR: + self.key = credentials.get('key', self.key) + self.secret_key = credentials.get('secret_key', self.secret_key) + self.session_token = '' + self.account_name = '' + elif self.credentials_type == CredentialsTypeChoice.ACCOUNT_NAME_TOKEN_PAIR: + self.session_token = credentials.get('session_token', self.session_token) + self.account_name = credentials.get('account_name', self.account_name) + self.key = '' + self.secret_key = '' + else: + raise NotImplementedError('Mapping credentials: unsupported credentials type') def values(self): return [self.key, self.secret_key, self.session_token, self.account_name] diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 1af0ab45ba3a..aa1a5630d79b 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -11,7 +11,7 @@ from cvat.apps.dataset_manager.formats.utils import get_label_color from cvat.apps.engine import models -from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, check_cloud_storage_existing, Credentials +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials, Status from cvat.apps.engine.log import slogger class BasicUserSerializer(serializers.ModelSerializer): @@ -454,7 +454,7 @@ def validate(self, attrs): if 'project_id' in attrs.keys() and self.instance is not None: project_id = attrs.get('project_id') if project_id is not None and not models.Project.objects.filter(id=project_id).count(): - raise serializers.ValidationError(f'Cannot find project with ID {project_id}') + raise serializers.ValidationError(f'Cannot find project with ID {project_id}') # Check that all labels can be mapped new_label_names = set() old_labels = self.instance.project.label_set.all() if self.instance.project_id else self.instance.label_set.all() @@ -834,40 +834,58 @@ def create(self, validated_data): session_token=validated_data.pop('session_token', ''), credentials_type = validated_data.get('credentials_type') ) + details = { + 'resource': validated_data.get('resource'), + 'credentials': credentials, + 'specific_attributes': { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in validated_data.get('specific_attributes').split('&') + } if len(validated_data.get('specific_attributes', '')) + else dict() + } + storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) if should_be_created: - details = { - 'resource': validated_data.get('resource'), - 'credentials': credentials, - 'specific_attributes': { - item.split('=')[0].strip(): item.split('=')[1].strip() - for item in validated_data.get('specific_attributes').split('&') - } if len(validated_data.get('specific_attributes', '')) - else dict() - } - storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) try: storage.create() except Exception as ex: slogger.glob.warning("Failed with creating storage\n{}".format(str(ex))) raise - manifests = validated_data.pop('manifests') - - db_storage = models.CloudStorage.objects.create( - credentials=credentials.convert_to_db(), - **validated_data - ) - db_storage.save() + storage_status = storage.get_status() + if storage_status == Status.AVAILABLE: + + manifests = validated_data.pop('manifests') + # check for existence of manifest files + for manifest in manifests: + if not storage.get_file_status(manifest.get('filename')) == Status.AVAILABLE: + raise serializers.ValidationError({ + 'manifests': "The '{}' file does not exist on '{}' cloud storage" \ + .format(manifest.get('filename'), storage.name) + }) + + db_storage = models.CloudStorage.objects.create( + credentials=credentials.convert_to_db(), + **validated_data + ) + db_storage.save() - manifest_file_instances = [models.Manifest(**manifest, cloud_storage=db_storage) for manifest in manifests] - models.Manifest.objects.bulk_create(manifest_file_instances) + manifest_file_instances = [models.Manifest(**manifest, cloud_storage=db_storage) for manifest in manifests] + models.Manifest.objects.bulk_create(manifest_file_instances) - cloud_storage_path = db_storage.get_storage_dirname() - if os.path.isdir(cloud_storage_path): - shutil.rmtree(cloud_storage_path) + cloud_storage_path = db_storage.get_storage_dirname() + if os.path.isdir(cloud_storage_path): + shutil.rmtree(cloud_storage_path) - os.makedirs(db_storage.get_storage_logs_dirname(), exist_ok=True) - return db_storage + os.makedirs(db_storage.get_storage_logs_dirname(), exist_ok=True) + return db_storage + elif storage_status == Status.FORBIDDEN: + field = 'credentials' + message = 'Cannot create resource {} with specified credentials. Access forbidden.'.format(storage.name) + else: + field = 'recource' + message = 'The resource {} not found. It may have been deleted.'.format(storage.name) + slogger.glob.error(message) + raise serializers.ValidationError({field: message}) # pylint: disable=no-self-use def update(self, instance, validated_data): @@ -882,21 +900,48 @@ def update(self, instance, validated_data): instance.credentials_type = validated_data.get('credentials_type', instance.credentials_type) instance.resource = validated_data.get('resource', instance.resource) instance.display_name = validated_data.get('display_name', instance.display_name) - - check_cloud_storage_existing(instance.provider_type, instance.credentials_type, credentials.session_token, - credentials.account_name, credentials.key, credentials.secret_key, instance.resource, instance.specific_attributes) - - new_manifest_names = set(i.get('filename') for i in validated_data.get('manifests', [])) - previos_manifest_names = set(i.filename for i in instance.manifests.all()) - delta_to_delete = tuple(previos_manifest_names - new_manifest_names) - delta_to_create = tuple(new_manifest_names - previos_manifest_names) - if delta_to_delete: - instance.manifests.filter(filename__in=delta_to_delete).delete() - if delta_to_create: - manifest_instances = [models.Manifest(filename=f, cloud_storage=instance) for f in delta_to_create] - models.Manifest.objects.bulk_create(manifest_instances) - instance.save() - return instance + instance.description = validated_data.get('description', instance.description) + instance.specific_attributes = validated_data.get('specific_attributes', instance.specific_attributes) + + # check cloud storage existing + details = { + 'resource': instance.resource, + 'credentials': credentials, + 'specific_attributes': { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in instance.specific_attributes.split('&') + } if len(instance.specific_attributes) + else dict() + } + storage = get_cloud_storage_instance(cloud_provider=instance.provider_type, **details) + storage_status = storage.get_status() + if storage_status == Status.AVAILABLE: + new_manifest_names = set(i.get('filename') for i in validated_data.get('manifests', [])) + previos_manifest_names = set(i.filename for i in instance.manifests.all()) + delta_to_delete = tuple(previos_manifest_names - new_manifest_names) + delta_to_create = tuple(new_manifest_names - previos_manifest_names) + if delta_to_delete: + instance.manifests.filter(filename__in=delta_to_delete).delete() + if delta_to_create: + # check manifest files existing + for manifest in delta_to_create: + if not storage.get_file_status(manifest) == Status.AVAILABLE: + raise serializers.ValidationError({ + 'manifests': "The '{}' file does not exist on '{}' cloud storage" + .format(manifest, storage.name) + }) + manifest_instances = [models.Manifest(filename=f, cloud_storage=instance) for f in delta_to_create] + models.Manifest.objects.bulk_create(manifest_instances) + instance.save() + return instance + elif storage_status == Status.FORBIDDEN: + field = 'credentials' + message = 'Cannot update resource {} with specified credentials. Access forbidden.'.format(storage.name) + else: + field = 'recource' + message = 'The resource {} not found. It may have been deleted.'.format(storage.name) + slogger.glob.error(message) + raise serializers.ValidationError({field: message}) class RelatedFileSerializer(serializers.ModelSerializer): diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index d9042c26a6ae..7bda7fd3b97f 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -255,6 +255,17 @@ def _create_thread(tid, data, isImport=False): first_sorted_media_image = sorted(media['image'])[0] cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) + # prepare task manifest file from cloud storage manifest file + manifest = ImageManifestManager(db_data.get_manifest_path()) + cloud_storage_manifest = ImageManifestManager( + os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) + ) + cloud_storage_manifest.init_index() + media_files = sorted(media['image']) + content = cloud_storage_manifest.get_subset(media_files) + manifest.create(content) + manifest.init_index() + av_scan_paths(upload_dir) job = rq.get_current_job() @@ -366,16 +377,6 @@ def update_progress(progress): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: - manifest = ImageManifestManager(db_data.get_manifest_path()) - # prepare task manifest file from cloud storage manifest file - cloud_storage_manifest = ImageManifestManager( - os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) - ) - cloud_storage_manifest.init_index() - media_files = sorted(media['image']) - content = cloud_storage_manifest.get_subset(media_files) - manifest.create(content) - manifest.init_index() img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 3e690d25fa6c..5aae2170c470 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -13,7 +13,6 @@ from datetime import datetime from distutils.util import strtobool from tempfile import mkstemp, NamedTemporaryFile -from PIL import Image as PILImage import cv2 from django.db.models.query import Prefetch @@ -42,11 +41,11 @@ import cvat.apps.dataset_manager as dm import cvat.apps.dataset_manager.views # pylint: disable=unused-import from cvat.apps.authentication import auth -from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, check_cloud_storage_existing, Credentials +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials, Status from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider -from cvat.apps.engine.media_extractors import MEDIA_TYPES, ImageListReader +from cvat.apps.engine.media_extractors import ImageListReader from cvat.apps.engine.mime_types import mimetypes from cvat.apps.engine.models import ( Job, StatusChoice, Task, Project, Review, Issue, @@ -1201,23 +1200,7 @@ def get_queryset(self): return queryset def perform_create(self, serializer): - # check that instance of cloud storage exists - provider_type = serializer.validated_data.get('provider_type') - credentials_type = serializer.validated_data.get('credentials_type') - session_token = serializer.validated_data.get('session_token', '') - account_name = serializer.validated_data.get('account_name', '') - key = serializer.validated_data.get('key', '') - secret_key = serializer.validated_data.get('secret_key', '') - resource = serializer.validated_data.get('resource') - specific_attributes = serializer.validated_data.get('specific_attributes', '') - - check_cloud_storage_existing(provider_type, credentials_type, session_token, account_name, - key, secret_key, resource, specific_attributes) - owner = self.request.data.get('owner') - if owner: - serializer.save() - else: - serializer.save(owner=self.request.user) + serializer.save(owner=self.request.user) def perform_destroy(self, instance): cloud_storage_dirname = instance.get_storage_dirname() @@ -1282,16 +1265,17 @@ def content(self, request, pk): if not db_storage.manifests.count(): raise Exception('There is no manifest file') manifest_path = request.query_params.get('manifest_path', 'manifest.jsonl') - if not storage.is_object_exist(manifest_path): + file_status = storage.get_file_status(manifest_path) + if file_status == Status.NOT_FOUND: raise FileNotFoundError(errno.ENOENT, "Not found on the cloud storage {}".format(db_storage.display_name), manifest_path) + elif file_status == Status.FORBIDDEN: + raise PermissionError(errno.EACCES, + "Access to the file on the '{}' cloud storage is denied".format(db_storage.display_name), manifest_path) full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_path) if not os.path.exists(full_manifest_path) or \ datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_path): - # create sub dirs - if os.path.dirname(manifest_path): - os.makedirs(os.path.dirname(full_manifest_path), exist_ok=True) storage.download_file(manifest_path, full_manifest_path) manifest = ImageManifestManager(full_manifest_path) # need to reset previon index @@ -1310,8 +1294,11 @@ def content(self, request, pk): return Response(data=msg, status=status.HTTP_404_NOT_FOUND) except Exception as ex: # check that cloud storage was not deleted - if not storage.exists(): - msg = 'The resource {} is no longer available. It may have been deleted'.format(storage.name) + storage_status = storage.get_status() + if storage_status == Status.FORBIDDEN: + msg = 'The resource {} is no longer available. Access forbidden.'.format(storage.name) + elif storage_status == Status.NOT_FOUND: + msg = 'The resource {} not found. It may have been deleted.'.format(storage.name) else: msg = str(ex) return HttpResponseBadRequest(msg) @@ -1340,34 +1327,89 @@ def preview(self, request, pk): 'specific_attributes': db_storage.get_specific_attributes() } storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) - storage.initialize_content() - storage_images = [f for f in storage.content if MEDIA_TYPES['image']['has_mime_type'](f)] - if not len(storage_images): + if not db_storage.manifests.count(): + raise Exception('Cannot get the cloud storage preview. There is no manifest file') + preview_path = None + for manifest_model in db_storage.manifests.all(): + full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_model.filename) + if not os.path.exists(full_manifest_path) or \ + datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_model.filename): + storage.download_file(manifest_model.filename, full_manifest_path) + manifest = ImageManifestManager(os.path.join(db_storage.get_storage_dirname(), manifest_model.filename)) + manifest.reset_index() + manifest.init_index() + if not len(manifest): + continue + preview_info = manifest[0] + preview_path = ''.join([preview_info['name'], preview_info['extension']]) + break + if not preview_path: msg = 'Cloud storage {} does not contain any images'.format(pk) slogger.cloud_storage[pk].info(msg) return HttpResponseBadRequest(msg) + + file_status = storage.get_file_status(preview_path) + if file_status == Status.NOT_FOUND: + raise FileNotFoundError(errno.ENOENT, + "Not found on the cloud storage {}".format(db_storage.display_name), preview_path) + elif file_status == Status.FORBIDDEN: + raise PermissionError(errno.EACCES, + "Access to the file on the '{}' cloud storage is denied".format(db_storage.display_name), preview_path) with NamedTemporaryFile() as temp_image: - storage.download_file(storage_images[0], temp_image.name) + storage.download_file(preview_path, temp_image.name) reader = ImageListReader([temp_image.name]) preview = reader.get_preview() preview.save(db_storage.get_preview_path()) - buf = io.BytesIO() - PILImage.open(db_storage.get_preview_path()).save(buf, format='JPEG') - buf.seek(0) content_type = mimetypes.guess_type(db_storage.get_preview_path())[0] - return HttpResponse(buf.getvalue(), content_type) + return HttpResponse(open(db_storage.get_preview_path(), 'rb').read(), content_type) except CloudStorageModel.DoesNotExist: message = f"Storage {pk} does not exist" slogger.glob.error(message) return HttpResponseNotFound(message) except Exception as ex: # check that cloud storage was not deleted - if not storage.exists(): - msg = 'The resource {} is no longer available. It may have been deleted'.format(storage.name) + storage_status = storage.get_status() + if storage_status == Status.FORBIDDEN: + msg = 'The resource {} is no longer available. Access forbidden.'.format(storage.name) + elif storage_status == Status.NOT_FOUND: + msg = 'The resource {} not found. It may have been deleted.'.format(storage.name) else: msg = str(ex) return HttpResponseBadRequest(msg) + @swagger_auto_schema( + method='get', + operation_summary='Method returns a cloud storage status', + responses={ + '200': openapi.Response(description='Status'), + }, + tags=['cloud storages'] + ) + @action(detail=True, methods=['GET'], url_path='status') + def status(self, request, pk): + try: + db_storage = CloudStorageModel.objects.get(pk=pk) + credentials = Credentials() + credentials.convert_from_db({ + 'type': db_storage.credentials_type, + 'value': db_storage.credentials, + }) + details = { + 'resource': db_storage.resource, + 'credentials': credentials, + 'specific_attributes': db_storage.get_specific_attributes() + } + storage = get_cloud_storage_instance(cloud_provider=db_storage.provider_type, **details) + storage_status = storage.get_status() + return HttpResponse(storage_status) + except CloudStorageModel.DoesNotExist: + message = f"Storage {pk} does not exist" + slogger.glob.error(message) + return HttpResponseNotFound(message) + except Exception as ex: + msg = str(ex) + return HttpResponseBadRequest(msg) + def rq_handler(job, exc_type, exc_value, tb): job.exc_info = "".join( traceback.format_exception_only(exc_type, exc_value)) @@ -1501,5 +1543,3 @@ def _export_annotations(db_task, rq_id, request, format_name, action, callback, meta={ 'request_time': timezone.localtime() }, result_ttl=ttl, failure_ttl=ttl) return Response(status=status.HTTP_202_ACCEPTED) - - diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 0a3829984ff8..5bbefe537afc 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -268,6 +268,7 @@ class _ManifestManager(ABC): } def __init__(self, path, *args, **kwargs): self._manifest = _Manifest(path) + self._index = _Index(os.path.dirname(self._manifest.path)) def _parse_line(self, line): """ Getting a random line from the manifest file """ @@ -286,7 +287,6 @@ def _parse_line(self, line): return json.loads(properties) def init_index(self): - self._index = _Index(os.path.dirname(self._manifest.path)) if os.path.exists(self._index.path): self._index.load() else: @@ -294,7 +294,6 @@ def init_index(self): self._index.dump() def reset_index(self): - self._index = _Index(os.path.dirname(self._manifest.path)) if os.path.exists(self._index.path): self._index.remove() From ea6a0d9e338305166343479f6bee22a226cb0826 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 14:15:00 +0300 Subject: [PATCH 14/24] Remove unused import --- cvat/apps/engine/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index fd29d8211403..b69ffc7f7c1c 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -7,7 +7,7 @@ from diskcache import Cache from django.conf import settings -from tempfile import NamedTemporaryFile, gettempdir +from tempfile import NamedTemporaryFile from cvat.apps.engine.log import slogger from cvat.apps.engine.media_extractors import (Mpeg4ChunkWriter, From 3d01a282f5681d0ec84a37347297f993ebb54777 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 19:27:53 +0300 Subject: [PATCH 15/24] Add manifest set_index method --- cvat/apps/engine/task.py | 2 +- cvat/apps/engine/views.py | 9 ++++----- utils/dataset_manifest/core.py | 4 ++++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 8a386c9b28fa..e3cb293e80dc 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -260,7 +260,7 @@ def _create_thread(tid, data, isImport=False): cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) ) - cloud_storage_manifest.init_index() + cloud_storage_manifest.set_index() media_files = sorted(media['image']) content = cloud_storage_manifest.get_subset(media_files) manifest.create(content) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 1c91d22d5576..d728bd5bbc8a 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1350,9 +1350,8 @@ def content(self, request, pk): datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_path): storage.download_file(manifest_path, full_manifest_path) manifest = ImageManifestManager(full_manifest_path) - # need to reset previon index - manifest.reset_index() - manifest.init_index() + # need to update index + manifest.set_index() manifest_files = manifest.data return Response(data=manifest_files, content_type="text/plain") @@ -1408,8 +1407,8 @@ def preview(self, request, pk): datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_model.filename): storage.download_file(manifest_model.filename, full_manifest_path) manifest = ImageManifestManager(os.path.join(db_storage.get_storage_dirname(), manifest_model.filename)) - manifest.reset_index() - manifest.init_index() + # need to update index + manifest.set_index() if not len(manifest): continue preview_info = manifest[0] diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 5bbefe537afc..02d099255a0b 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -297,6 +297,10 @@ def reset_index(self): if os.path.exists(self._index.path): self._index.remove() + def set_index(self): + self.reset_index() + self.init_index() + @abstractmethod def create(self, content, **kwargs): pass From 32761b6b689bff99d37f65ee3e2ed806937b8412 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 19:28:28 +0300 Subject: [PATCH 16/24] Implement status support for Azure blob container --- cvat/apps/engine/cloud_provider.py | 40 ++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 47f8672ba518..a95da5eb2a49 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -14,7 +14,7 @@ from botocore.handlers import disable_signing from azure.storage.blob import BlobServiceClient -from azure.core.exceptions import ResourceExistsError +from azure.core.exceptions import ResourceExistsError, HttpResponseError from azure.storage.blob import PublicAccess from cvat.apps.engine.log import slogger @@ -272,23 +272,7 @@ def create(self): raise Exception(msg) def _head(self): - #TODO - raise NotImplementedError() - - def get_status(self): - # TODO - raise NotImplementedError() - - def get_file_status(self, key): - # TODO - raise NotImplementedError() - - # def exists(self): - # return self._container_client.exists(timeout=2) - - # def is_object_exist(self, file_name): - # blob_client = self._container_client.get_blob_client(file_name) - # return blob_client.exists() + return self._container_client.get_container_properties() def _head_file(self, key): blob_client = self.container.get_blob_client(key) @@ -297,6 +281,26 @@ def _head_file(self, key): def get_file_last_modified(self, key): return self._head_file(key).last_modified + def get_status(self): + try: + self._head() + return Status.AVAILABLE + except HttpResponseError as ex: + if ex.status_code == 403: + return Status.FORBIDDEN + else: + return Status.NOT_FOUND + + def get_file_status(self, key): + try: + self._head_file(key) + return Status.AVAILABLE + except HttpResponseError as ex: + if ex.status_code == 403: + return Status.FORBIDDEN + else: + return Status.NOT_FOUND + def upload_file(self, file_obj, file_name): self._container_client.upload_blob(name=file_name, data=file_obj) From f574fee89d56c7e409bc2d7ea2269905daa3606a Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 19:29:26 +0300 Subject: [PATCH 17/24] Move specific attributes parsing into utils --- cvat/apps/engine/models.py | 7 ++----- cvat/apps/engine/serializers.py | 14 +++----------- cvat/apps/engine/utils.py | 9 ++++++++- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 544fb0e0b99f..fce84cf86d83 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -12,6 +12,7 @@ from django.db import models from django.utils.translation import gettext_lazy as _ +from cvat.apps.engine.utils import parse_specific_attributes class SafeCharField(models.CharField): def get_prep_value(self, value): @@ -619,8 +620,4 @@ def get_preview_path(self): return os.path.join(self.get_storage_dirname(), 'preview.jpeg') def get_specific_attributes(self): - specific_attributes = self.specific_attributes - return { - item.split('=')[0].strip(): item.split('=')[1].strip() - for item in specific_attributes.split('&') - } if specific_attributes else dict() + return parse_specific_attributes(self.specific_attributes) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index d74d85ae694d..0bb40601bc87 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -13,6 +13,7 @@ from cvat.apps.engine import models from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials, Status from cvat.apps.engine.log import slogger +from cvat.apps.engine.utils import parse_specific_attributes class BasicUserSerializer(serializers.ModelSerializer): def validate(self, data): @@ -846,11 +847,7 @@ def create(self, validated_data): details = { 'resource': validated_data.get('resource'), 'credentials': credentials, - 'specific_attributes': { - item.split('=')[0].strip(): item.split('=')[1].strip() - for item in validated_data.get('specific_attributes').split('&') - } if len(validated_data.get('specific_attributes', '')) - else dict() + 'specific_attributes': parse_specific_attributes(validate_data.get('specific_attributes')) } storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) if should_be_created: @@ -862,7 +859,6 @@ def create(self, validated_data): storage_status = storage.get_status() if storage_status == Status.AVAILABLE: - manifests = validated_data.pop('manifests') # check for existence of manifest files for manifest in manifests: @@ -916,11 +912,7 @@ def update(self, instance, validated_data): details = { 'resource': instance.resource, 'credentials': credentials, - 'specific_attributes': { - item.split('=')[0].strip(): item.split('=')[1].strip() - for item in instance.specific_attributes.split('&') - } if len(instance.specific_attributes) - else dict() + 'specific_attributes': parse_specific_attributes(instance.specific_attributes) } storage = get_cloud_storage_instance(cloud_provider=instance.provider_type, **details) storage_status = storage.get_status() diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index 87b7b856e301..c7d8ed49ccd8 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -98,4 +98,11 @@ def md5_hash(frame): frame = frame.to_image() elif isinstance(frame, str): frame = Image.open(frame, 'r') - return hashlib.md5(frame.tobytes()).hexdigest() # nosec \ No newline at end of file + return hashlib.md5(frame.tobytes()).hexdigest() # nosec + +def parse_specific_attributes(specific_attributes): + assert isinstance(specific_attributes, str), 'Specific attributes must be a string' + return { + item.split('=')[0].strip(): item.split('=')[1].strip() + for item in specific_attributes.split('&') + } if specific_attributes else dict() \ No newline at end of file From 3ce5bcd3c36034422a611c66c791634e011fd614 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 19:30:01 +0300 Subject: [PATCH 18/24] Fix missing in migration --- cvat/apps/engine/migrations/0041_auto_20210813_0853.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0041_auto_20210813_0853.py b/cvat/apps/engine/migrations/0041_auto_20210813_0853.py index c66e76135b49..ecf416367a4e 100644 --- a/cvat/apps/engine/migrations/0041_auto_20210813_0853.py +++ b/cvat/apps/engine/migrations/0041_auto_20210813_0853.py @@ -21,7 +21,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('filename', models.CharField(default='manifest.jsonl', max_length=1024)), - ('cloud_storage', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='manifests', to='engine.cloudstorage')), + ('cloud_storage', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='manifests', to='engine.cloudstorage')), ], ), ] From d354b1887473c44c673981b4c1d6d36b3d0a99f4 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 26 Aug 2021 19:34:24 +0300 Subject: [PATCH 19/24] Fix error display --- cvat/apps/engine/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index d728bd5bbc8a..61688290befc 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1297,7 +1297,7 @@ def create(self, request, *args, **kwargs): msg_body = "" for ex in exceptions.args: for field, ex_msg in ex.items(): - msg_body += ": ".join([field, str(ex_msg[0])]) + msg_body += ': '.join([field, ex_msg if isinstance(ex_msg, str) else str(ex_msg[0])]) msg_body += '\n' return HttpResponseBadRequest(msg_body) except APIException as ex: From c1f68a770032bf7d7417575bd52a61c5701ccd62 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 27 Aug 2021 10:23:47 +0300 Subject: [PATCH 20/24] some fix --- cvat/apps/engine/cache.py | 15 ++++++++++++--- cvat/apps/engine/serializers.py | 24 ++++++++++++++++++------ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index b69ffc7f7c1c..71380f281a7f 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -15,7 +15,7 @@ ImageDatasetManifestReader, VideoDatasetManifestReader) from cvat.apps.engine.models import DataChoice, StorageChoice from cvat.apps.engine.models import DimensionType -from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials +from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials, Status from cvat.apps.engine.utils import md5_hash class CacheInteraction: def __init__(self, dimension=DimensionType.DIM_2D): @@ -100,9 +100,18 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number): slogger.cloud_storage[db_cloud_storage.id].warning('Hash sums of files {} do not match'.format(file_name)) images.append((source_path, source_path, None)) except Exception as ex: - if not cloud_storage_instance.exists(): - msg = 'The resource {} is no longer available. It may have been deleted'.format(cloud_storage_instance.name) + storage_status = cloud_storage_instance.get_status() + if storage_status == Status.FORBIDDEN: + msg = 'The resource {} is no longer available. Access forbidden.'.format(cloud_storage_instance.name) + elif storage_status == Status.NOT_FOUND: + msg = 'The resource {} not found. It may have been deleted.'.format(cloud_storage_instance.name) else: + # check status of last file + file_status = cloud_storage_instance.get_file_status(file_name) + if file_status == Status.NOT_FOUND: + raise Exception("'{}' not found on the cloud storage '{}'".format(file_name, cloud_storage_instance.name)) + elif file_status == Status.FORBIDDEN: + raise Exception("Access to the file '{}' on the '{}' cloud storage is denied".format(file_name, cloud_storage_instance.name)) msg = str(ex) raise Exception(msg) else: diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 0bb40601bc87..029ef3520023 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -847,7 +847,7 @@ def create(self, validated_data): details = { 'resource': validated_data.get('resource'), 'credentials': credentials, - 'specific_attributes': parse_specific_attributes(validate_data.get('specific_attributes')) + 'specific_attributes': parse_specific_attributes(validated_data.get('specific_attributes', '')) } storage = get_cloud_storage_instance(cloud_provider=provider_type, **details) if should_be_created: @@ -860,13 +860,19 @@ def create(self, validated_data): storage_status = storage.get_status() if storage_status == Status.AVAILABLE: manifests = validated_data.pop('manifests') - # check for existence of manifest files + # check manifest files availability for manifest in manifests: - if not storage.get_file_status(manifest.get('filename')) == Status.AVAILABLE: + file_status = storage.get_file_status(manifest.get('filename')) + if file_status == Status.NOT_FOUND: raise serializers.ValidationError({ 'manifests': "The '{}' file does not exist on '{}' cloud storage" \ .format(manifest.get('filename'), storage.name) - }) + }) + elif file_status == Status.FORBIDDEN: + raise serializers.ValidationError({ + 'manifests': "The '{}' file does not available on '{}' cloud storage. Access denied" \ + .format(manifest.get('filename'), storage.name) + }) db_storage = models.CloudStorage.objects.create( credentials=credentials.convert_to_db(), @@ -926,11 +932,17 @@ def update(self, instance, validated_data): if delta_to_create: # check manifest files existing for manifest in delta_to_create: - if not storage.get_file_status(manifest) == Status.AVAILABLE: + file_status = storage.get_file_status(manifest) + if file_status == Status.NOT_FOUND: raise serializers.ValidationError({ 'manifests': "The '{}' file does not exist on '{}' cloud storage" .format(manifest, storage.name) - }) + }) + elif file_status == Status.FORBIDDEN: + raise serializers.ValidationError({ + 'manifests': "The '{}' file does not available on '{}' cloud storage. Access denied" \ + .format(manifest.get('filename'), storage.name) + }) manifest_instances = [models.Manifest(filename=f, cloud_storage=instance) for f in delta_to_create] models.Manifest.objects.bulk_create(manifest_instances) instance.save() From b0f42afdc2953bdcc54e724b5b48d6918d54224d Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 30 Aug 2021 13:11:22 +0300 Subject: [PATCH 21/24] Update migration dependency --- cvat/apps/engine/migrations/0041_auto_20210827_0258.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0041_auto_20210827_0258.py b/cvat/apps/engine/migrations/0041_auto_20210827_0258.py index 0e089c0e630a..ff1f60be94f7 100644 --- a/cvat/apps/engine/migrations/0041_auto_20210827_0258.py +++ b/cvat/apps/engine/migrations/0041_auto_20210827_0258.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('engine', '0040_cloud_storage'), + ('engine', '0041_auto_20210813_0853'), ] operations = [ From 5f94b32a38d531656f3805954739ba4825db43ec Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 30 Aug 2021 13:11:46 +0300 Subject: [PATCH 22/24] Update google cloud storage status --- cvat/apps/engine/cloud_provider.py | 37 +++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 6f3eadee0a8f..869b2ad1385f 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -8,8 +8,6 @@ from abc import ABC, abstractmethod, abstractproperty from enum import Enum from io import BytesIO -import os -import os.path from boto3.s3.transfer import TransferConfig from botocore.exceptions import ClientError @@ -20,6 +18,7 @@ from azure.storage.blob import PublicAccess from google.cloud import storage +from google.cloud.exceptions import NotFound as GoogleCloudNotFound, Forbidden as GoogleCloudForbidden from cvat.apps.engine.log import slogger from cvat.apps.engine.models import CredentialsTypeChoice, CloudProviderChoice @@ -341,6 +340,20 @@ def download_fileobj(self, key): class GOOGLE_DRIVE(_CloudStorage): pass +def _define_gcs_status(func): + def wrapper(self, key=None): + try: + if not key: + func(self) + else: + func(self, key) + return Status.AVAILABLE + except GoogleCloudNotFound: + return Status.NOT_FOUND + except GoogleCloudForbidden: + return Status.FORBIDDEN + return wrapper + class GoogleCloudStorage(_CloudStorage): def __init__(self, bucket_name, prefix=None, service_account_json=None, project=None, location=None): @@ -366,8 +379,20 @@ def bucket(self): def name(self): return self._bucket.name - def exists(self): - return self._storage_client.lookup_bucket(self.name) is not None + def _head(self): + return self._storage_client.get_bucket(bucket_or_name=self.name) + + def _head_file(self, key): + blob = self.bucket.blob(key) + return self._storage_client._get_resource(blob.path) + + @_define_gcs_status + def get_status(self): + self._head() + + @_define_gcs_status + def get_file_status(self, key): + self._head_file(key) def initialize_content(self): self._files = [ @@ -386,9 +411,6 @@ def download_fileobj(self, key): buf.seek(0) return buf - def is_object_exist(self, key): - return self.bucket.blob(key).exists() - def upload_file(self, file_obj, file_name): self.bucket.blob(file_name).upload_from_file(file_obj) @@ -414,7 +436,6 @@ def get_file_last_modified(self, key): blob.reload() return blob.updated - class Credentials: __slots__ = ('key', 'secret_key', 'session_token', 'account_name', 'key_file_path', 'credentials_type') From 1cff0915d43d9910cfac89fe054981bde092879d Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 30 Aug 2021 13:59:28 +0300 Subject: [PATCH 23/24] Update migrtaions --- cvat/apps/engine/migrations/0041_auto_20210827_0258.py | 2 +- ...041_auto_20210813_0853.py => 0042_auto_20210830_1056.py} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename cvat/apps/engine/migrations/{0041_auto_20210813_0853.py => 0042_auto_20210830_1056.py} (80%) diff --git a/cvat/apps/engine/migrations/0041_auto_20210827_0258.py b/cvat/apps/engine/migrations/0041_auto_20210827_0258.py index ff1f60be94f7..0e089c0e630a 100644 --- a/cvat/apps/engine/migrations/0041_auto_20210827_0258.py +++ b/cvat/apps/engine/migrations/0041_auto_20210827_0258.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('engine', '0041_auto_20210813_0853'), + ('engine', '0040_cloud_storage'), ] operations = [ diff --git a/cvat/apps/engine/migrations/0041_auto_20210813_0853.py b/cvat/apps/engine/migrations/0042_auto_20210830_1056.py similarity index 80% rename from cvat/apps/engine/migrations/0041_auto_20210813_0853.py rename to cvat/apps/engine/migrations/0042_auto_20210830_1056.py index ecf416367a4e..7b5a496af97c 100644 --- a/cvat/apps/engine/migrations/0041_auto_20210813_0853.py +++ b/cvat/apps/engine/migrations/0042_auto_20210830_1056.py @@ -1,4 +1,4 @@ -# Generated by Django 3.1.13 on 2021-08-13 08:53 +# Generated by Django 3.1.13 on 2021-08-30 10:56 from django.db import migrations, models import django.db.models.deletion @@ -7,14 +7,14 @@ class Migration(migrations.Migration): dependencies = [ - ('engine', '0040_cloud_storage'), + ('engine', '0041_auto_20210827_0258'), ] operations = [ migrations.AlterField( model_name='cloudstorage', name='credentials_type', - field=models.CharField(choices=[('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29), + field=models.CharField(choices=[('KEY_SECRET_KEY_PAIR', 'KEY_SECRET_KEY_PAIR'), ('ACCOUNT_NAME_TOKEN_PAIR', 'ACCOUNT_NAME_TOKEN_PAIR'), ('KEY_FILE_PATH', 'KEY_FILE_PATH'), ('ANONYMOUS_ACCESS', 'ANONYMOUS_ACCESS')], max_length=29), ), migrations.CreateModel( name='Manifest', From 39881bb7a85b91a1abb5edb5dca8e69cb1b883e8 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 30 Aug 2021 17:46:03 +0300 Subject: [PATCH 24/24] Update CHANGELOG --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ffa5a216297..f5e168ec4f2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,11 +15,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 and project with 3D tasks () - Additional inline tips in interactors with demo gifs () - Added intelligent scissors blocking feature () +- Support cloud storage status () +- Support cloud storage preview () ### Changed - Non-blocking UI when using interactors () - "Selected opacity" slider now defines opacity level for shapes being drawnSelected opacity () +- Cloud storage creating and updating () +- Way of working with cloud storage content () ### Deprecated @@ -27,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed -- TDB +- Support TEMP_KEY_SECRET_KEY_TOKEN_SET for AWS S3 cloud storage () ### Fixed