From a3caa9e7775d3bca18de8eeb1a0f8c2e6fc32e78 Mon Sep 17 00:00:00 2001
From: Maria Khrustaleva <maria@cvat.ai>
Date: Mon, 14 Oct 2024 12:40:57 +0200
Subject: [PATCH] Add REST API test

---
 tests/python/rest_api/test_tasks.py  | 95 ++++++++++++++++++++++++++--
 tests/python/shared/utils/helpers.py | 12 +++-
 2 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py
index 408e74d88959..c57dec13f639 100644
--- a/tests/python/rest_api/test_tasks.py
+++ b/tests/python/rest_api/test_tasks.py
@@ -32,6 +32,7 @@
     ClassVar,
     Dict,
     Generator,
+    Iterable,
     List,
     Optional,
     Sequence,
@@ -1529,12 +1530,13 @@ def _create_task_with_cloud_data(
         server_files: List[str],
         use_cache: bool = True,
         sorting_method: str = "lexicographical",
-        spec: Optional[Dict[str, Any]] = None,
         data_type: str = "image",
         video_frame_count: int = 10,
         server_files_exclude: Optional[List[str]] = None,
-        org: Optional[str] = None,
+        org: str = "",
         filenames: Optional[List[str]] = None,
+        task_spec_kwargs: Optional[Dict[str, Any]] = None,
+        data_spec_kwargs: Optional[Dict[str, Any]] = None,
     ) -> Tuple[int, Any]:
         s3_client = s3.make_client(bucket=cloud_storage["resource"])
         if data_type == "video":
@@ -1551,7 +1553,9 @@ def _create_task_with_cloud_data(
             )
         else:
             images = generate_image_files(
-                3, **({"prefixes": ["img_"] * 3} if not filenames else {"filenames": filenames})
+                3,
+                sizes=[(100, 50) if i % 2 else (50, 100) for i in range(3)],
+                **({"prefixes": ["img_"] * 3} if not filenames else {"filenames": filenames}),
             )
 
             for image in images:
@@ -1598,6 +1602,7 @@ def _create_task_with_cloud_data(
                     "name": "car",
                 }
             ],
+            **(task_spec_kwargs or {}),
         }
 
         data_spec = {
@@ -1608,9 +1613,8 @@ def _create_task_with_cloud_data(
                 server_files if not use_manifest else server_files + ["test/manifest.jsonl"]
             ),
             "sorting_method": sorting_method,
+            **(data_spec_kwargs or {}),
         }
-        if spec is not None:
-            data_spec.update(spec)
 
         if server_files_exclude:
             data_spec["server_files_exclude"] = server_files_exclude
@@ -1984,7 +1988,7 @@ def test_create_task_with_cloud_storage_and_check_retrieve_data_meta(
             use_cache=False,
             server_files=["test/video/video.avi"],
             org=org,
-            spec=data_spec,
+            data_spec_kwargs=data_spec,
             data_type="video",
         )
 
@@ -2550,6 +2554,85 @@ def test_can_create_task_with_gt_job_from_video(
         else:
             assert len(validation_frames) == validation_frames_count
 
+    @pytest.mark.with_external_services
+    @pytest.mark.parametrize("cloud_storage_id", [2])
+    @pytest.mark.parametrize(
+        "validation_mode",
+        [
+            models.ValidationMode("gt"),
+            models.ValidationMode("gt_pool"),
+        ],
+    )
+    def test_can_create_task_with_validation_and_cloud_data(
+        self,
+        cloud_storage_id: int,
+        validation_mode: models.ValidationMode,
+        request: pytest.FixtureRequest,
+        admin_user: str,
+        cloud_storages: Iterable,
+    ):
+        cloud_storage = cloud_storages[cloud_storage_id]
+        server_files = [f"test/sub_0/img_{i}.jpeg" for i in range(3)]
+        validation_frames = ["test/sub_0/img_1.jpeg"]
+
+        (task_id, _) = self._create_task_with_cloud_data(
+            request,
+            cloud_storage,
+            use_manifest=False,
+            server_files=server_files,
+            sorting_method=models.SortingMethod(
+                "random"
+            ),  # only random sorting can be used with gt_pool
+            data_spec_kwargs={
+                "validation_params": models.DataRequestValidationParams._from_openapi_data(
+                    mode=validation_mode,
+                    frames=validation_frames,
+                    frame_selection_method=models.FrameSelectionMethod("manual"),
+                    frames_per_job_count=1,
+                )
+            },
+            task_spec_kwargs={
+                # in case of gt_pool: each regular job will contain 1 regular and 1 validation frames,
+                # (number of validation frames is not included into segment_size)
+                "segment_size": 1,
+            },
+        )
+
+        with make_api_client(admin_user) as api_client:
+            # check that GT job was created
+            (paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="ground_truth")
+            assert 1 == len(paginated_jobs["results"])
+
+            (paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="annotation")
+            jobs_count = (
+                len(server_files) - len(validation_frames)
+                if validation_mode == models.ValidationMode("gt_pool")
+                else len(server_files)
+            )
+            assert jobs_count == len(paginated_jobs["results"])
+            # check that the returned meta of images corresponds to the chunk data
+            # Note: meta is based on the order of images from database
+            # while chunk with CS data is based on the order of images in a manifest
+            for job in paginated_jobs["results"]:
+                (job_meta, _) = api_client.jobs_api.retrieve_data_meta(job["id"])
+                (_, response) = api_client.jobs_api.retrieve_data(
+                    job["id"], type="chunk", quality="compressed", index=0
+                )
+                chunk_file = io.BytesIO(response.data)
+                assert zipfile.is_zipfile(chunk_file)
+
+                with zipfile.ZipFile(chunk_file, "r") as chunk_archive:
+                    chunk_images = {
+                        int(os.path.splitext(name)[0]): np.array(
+                            Image.open(io.BytesIO(chunk_archive.read(name)))
+                        )
+                        for name in chunk_archive.namelist()
+                    }
+                    chunk_images = dict(sorted(chunk_images.items(), key=lambda e: e[0]))
+
+                    for img, img_meta in zip(chunk_images.values(), job_meta.frames):
+                        assert (img.shape[0], img.shape[1]) == (img_meta.height, img_meta.width)
+
 
 class _SourceDataType(str, Enum):
     images = "images"
diff --git a/tests/python/shared/utils/helpers.py b/tests/python/shared/utils/helpers.py
index ac5948182d78..14015f4b2ad3 100644
--- a/tests/python/shared/utils/helpers.py
+++ b/tests/python/shared/utils/helpers.py
@@ -5,7 +5,7 @@
 import subprocess
 from contextlib import closing
 from io import BytesIO
-from typing import Generator, List, Optional
+from typing import Generator, List, Optional, Tuple
 
 import av
 import av.video.reformatter
@@ -25,7 +25,11 @@ def generate_image_file(filename="image.png", size=(100, 50), color=(0, 0, 0)):
 
 
 def generate_image_files(
-    count, prefixes=None, *, filenames: Optional[List[str]] = None
+    count: int,
+    *,
+    prefixes: Optional[List[str]] = None,
+    filenames: Optional[List[str]] = None,
+    sizes: Optional[List[Tuple[int, int]]] = None,
 ) -> List[BytesIO]:
     assert not (prefixes and filenames), "prefixes cannot be used together with filenames"
     assert not prefixes or len(prefixes) == count
@@ -35,7 +39,9 @@ def generate_image_files(
     for i in range(count):
         prefix = prefixes[i] if prefixes else ""
         filename = f"{prefix}{i}.jpeg" if not filenames else filenames[i]
-        image = generate_image_file(filename, color=(i, i, i))
+        image = generate_image_file(
+            filename, color=(i, i, i), **({"size": sizes[i]}) if sizes else {}
+        )
         images.append(image)
 
     return images