Handle task creation with unsupported data types from cloud storage w…

…ith cache enabled (#7087)
cvat-ai · Nov 7, 2023 · 92f5181 · 92f5181
1 parent 40cb454
commit 92f5181
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 10 deletions.
diff --git a/...log.d/20231107_145655_maria_handle_task_creation_withvideo_from_cs_and_cache.md b/...log.d/20231107_145655_maria_handle_task_creation_withvideo_from_cs_and_cache.md
@@ -0,0 +1,4 @@
+### Changed
+
+- Ignore the "use cache" option on the server when creating a task with cloud storage data (except images)
+  (<https://github.com/opencv/cvat/pull/7087>)
diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
@@ -513,6 +513,12 @@ def _create_thread(
     upload_dir = db_data.get_upload_dirname() if db_data.storage != models.StorageChoice.SHARE else settings.SHARE_ROOT
     is_data_in_cloud = db_data.storage == models.StorageChoice.CLOUD_STORAGE
 
+    job = rq.get_current_job()
+
+    def _update_status(msg: str) -> None:
+        job.meta['status'] = msg
+        job.save_meta()
+
     if data['remote_files'] and not isDatasetImport:
         data['remote_files'] = _download_data(data['remote_files'], upload_dir)
 
@@ -641,17 +647,28 @@ def _create_thread(
                 filtered_files.append(f)
             data['server_files'] = filtered_files
 
+    # count and validate uploaded files
+    media = _count_files(data)
+    media, task_mode = _validate_data(media, manifest_files)
+
+    if is_data_in_cloud:
+        # first we need to filter files and keep only supported ones
+        if any([v for k, v in media.items() if k != 'image']) and db_data.storage_method == models.StorageMethodChoice.CACHE:
+            # FUTURE-FIXME: This is a temporary workaround for creating tasks
+            # with unsupported cloud storage data (video, archive, pdf) when use_cache is enabled
+            db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM
+            _update_status("The 'use cache' option is ignored")
+
         if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
-            _download_data_from_cloud_storage(db_data.cloud_storage, data['server_files'], upload_dir)
+            filtered_data = []
+            for files in (i for i in media.values() if i):
+                filtered_data.extend(files)
+            _download_data_from_cloud_storage(db_data.cloud_storage, filtered_data, upload_dir)
             is_data_in_cloud = False
             db_data.storage = models.StorageChoice.LOCAL
         else:
             manifest = ImageManifestManager(db_data.get_manifest_path())
 
-    # count and validate uploaded files
-    media = _count_files(data)
-    media, task_mode = _validate_data(media, manifest_files)
-
     if job_file_mapping is not None and task_mode != 'annotation':
         raise ValidationError("job_file_mapping can't be used with sequence-based data like videos")
 
@@ -679,7 +696,6 @@ def _create_thread(
 
     av_scan_paths(upload_dir)
 
-    job = rq.get_current_job()
     job.meta['status'] = 'Media files are being extracted...'
     job.save_meta()
 
@@ -913,10 +929,6 @@ def update_progress(progress):
     video_path = ""
     video_size = (0, 0)
 
-    def _update_status(msg):
-        job.meta['status'] = msg
-        job.save_meta()
-
     db_images = []
 
     if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: