Skip to content

Commit

Permalink
Fix task mode in tasks restored from backup (#6216)
Browse files Browse the repository at this point in the history
- Fixes the problem with the task mode described in the CVAT corrupts annotations when exported with CVAT for video (#5668)
- Fixes the problem with failing backup import because of the manifest file existence from Cannot export dataset (KeyError: 'outside') / can't import backup from older version (#5971)
- Cleaned test assets (removed extra directories from unexistent tasks and jobs)
  • Loading branch information
zhiltsov-max committed Jun 19, 2023
1 parent 24013a6 commit 38df1cf
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 4 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Issues with running serverless models for EXIF-rotated images. (<https://github.com/opencv/cvat/pull/6275/>)
- File uploading issues when using https configuration. (<https://github.com/opencv/cvat/pull/6308>)
- The problem with manifest file in tasks restored from backup (<https://github.com/opencv/cvat/issues/5971>)
- The problem with task mode in a task restored from backup (<https://github.com/opencv/cvat/issues/5668>)

### Security
- TDB
Expand Down Expand Up @@ -56,6 +58,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Broken logging in the TransT serverless function
(<https://github.com/opencv/cvat/pull/6290>)

### Security
- TDB

## \[2.4.5] - 2023-06-02
### Added
- Integrated support for sharepoint and cloud storage files, along with
Expand Down
16 changes: 12 additions & 4 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def _validate_manifest(
db_cloud_storage: Optional[Any],
data_storage_method: str,
data_sorting_method: str,
isBackupRestore: bool,
) -> Optional[str]:
if manifests:
if len(manifests) != 1:
Expand All @@ -331,15 +332,21 @@ def _validate_manifest(
if is_manifest(full_manifest_path):
if not (
data_sorting_method == models.SortingMethod.PREDEFINED or
data_storage_method == models.StorageMethodChoice.CACHE and settings.USE_CACHE
(settings.USE_CACHE and data_storage_method == models.StorageMethodChoice.CACHE) or
isBackupRestore
):
cache_disabled_message = ""
if data_storage_method == models.StorageMethodChoice.CACHE and not settings.USE_CACHE:
slogger.glob.warning("This server doesn't allow to use cache for data. "
"Please turn 'use cache' off and try to recreate the task")
cache_disabled_message = (
"This server doesn't allow to use cache for data. "
"Please turn 'use cache' off and try to recreate the task"
)
slogger.glob.warning(cache_disabled_message)

raise ValidationError(
"A manifest file can only be used with the 'use cache' option "
"or when the 'sorting_method' == 'predefined'"
"or when 'sorting_method' is 'predefined'" + \
(". " + cache_disabled_message if cache_disabled_message else "")
)
return manifest_file

Expand Down Expand Up @@ -547,6 +554,7 @@ def _create_thread(
db_cloud_storage=db_data.cloud_storage if is_data_in_cloud else None,
data_storage_method=db_data.storage_method,
data_sorting_method=data['sorting_method'],
isBackupRestore=isBackupRestore,
)

manifest = None
Expand Down
112 changes: 112 additions & 0 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,118 @@ def test_work_with_task_containing_non_stable_cloud_storage_files(
assert image_name in ex.body


class TestTaskBackups:
def _make_client(self) -> Client:
return Client(BASE_URL, config=Config(status_check_period=0.01))

@pytest.fixture(autouse=True)
def setup(self, restore_db_per_function, restore_cvat_data, tmp_path: Path, admin_user: str):
self.tmp_dir = tmp_path

self.client = self._make_client()
self.user = admin_user

with self.client:
self.client.login((self.user, USER_PASS))

@pytest.mark.parametrize("mode", ["annotation", "interpolation"])
def test_can_export_backup(self, tasks, mode):
task_id = next(t for t in tasks if t["mode"] == mode)["id"]
task = self.client.tasks.retrieve(task_id)

filename = self.tmp_dir / f"task_{task.id}_backup.zip"
task.download_backup(filename)

assert filename.is_file()
assert filename.stat().st_size > 0

@pytest.mark.parametrize("mode", ["annotation", "interpolation"])
def test_can_import_backup(self, tasks, mode):
task_json = next(t for t in tasks if t["mode"] == mode)
self._test_can_restore_backup_task(task_json["id"])

@pytest.mark.parametrize("mode", ["annotation", "interpolation"])
def test_can_import_backup_for_task_in_nondefault_state(self, tasks, mode):
# Reproduces the problem with empty 'mode' in a restored task,
# described in the reproduction steps https://github.com/opencv/cvat/issues/5668

task_json = next(t for t in tasks if t["mode"] == mode and t["jobs"]["count"])

task = self.client.tasks.retrieve(task_json["id"])
jobs = task.get_jobs()
for j in jobs:
j.update({"stage": "validation"})

self._test_can_restore_backup_task(task_json["id"])

def _test_can_restore_backup_task(self, task_id: int):
task = self.client.tasks.retrieve(task_id)
(_, response) = self.client.api_client.tasks_api.retrieve(task_id)
task_json = json.loads(response.data)

filename = self.tmp_dir / f"task_{task.id}_backup.zip"
task.download_backup(filename)

restored_task = self.client.tasks.create_from_backup(filename)

old_jobs = task.get_jobs()
new_jobs = restored_task.get_jobs()
assert len(old_jobs) == len(new_jobs)

for old_job, new_job in zip(old_jobs, new_jobs):
assert old_job.status == new_job.status
assert old_job.start_frame == new_job.start_frame
assert old_job.stop_frame == new_job.stop_frame

(_, response) = self.client.api_client.tasks_api.retrieve(restored_task.id)
restored_task_json = json.loads(response.data)

assert restored_task_json["assignee"] is None
assert restored_task_json["owner"]["username"] == self.user
assert restored_task_json["id"] != task_json["id"]
assert restored_task_json["data"] != task_json["data"]
assert restored_task_json["organization"] is None
assert restored_task_json["data_compressed_chunk_type"] in ["imageset", "video"]
if task_json["jobs"]["count"] == 1:
assert restored_task_json["overlap"] == 0
else:
assert restored_task_json["overlap"] == task_json["overlap"]
assert restored_task_json["jobs"]["completed"] == 0
assert restored_task_json["jobs"]["validation"] == 0
assert restored_task_json["source_storage"] is None
assert restored_task_json["target_storage"] is None
assert restored_task_json["project_id"] is None

assert (
DeepDiff(
task_json,
restored_task_json,
ignore_order=True,
exclude_regex_paths=[
r"root\['id'\]", # id, must be different
r"root\['created_date'\]", # must be different
r"root\['updated_date'\]", # must be different
r"root\['assignee'\]", # id, depends on the situation
r"root\['owner'\]", # id, depends on the situation
r"root\['data'\]", # id, must be different
r"root\['organization'\]", # depends on the task setup
r"root\['project_id'\]", # should be dropped
r"root(\['.*'\])*\['url'\]", # depends on the task id
r"root\['data_compressed_chunk_type'\]", # depends on the server configuration
r"root\['source_storage'\]", # should be dropped
r"root\['target_storage'\]", # should be dropped
r"root\['jobs'\]\['completed'\]", # job statuses should be renewed
r"root\['jobs'\]\['validation'\]", # job statuses should be renewed
# depends on the actual job configuration,
# unlike to what is obtained from the regular task creation,
# where the requested number is recorded
r"root\['overlap'\]",
],
)
== {}
)


@pytest.mark.usefixtures("restore_db_per_function")
class TestWorkWithGtJobs:
def test_normal_and_gt_job_annotations_are_not_merged(
Expand Down
Binary file modified tests/python/shared/assets/cvat_db/cvat_data.tar.bz2
Binary file not shown.

0 comments on commit 38df1cf

Please sign in to comment.