From a3caa9e7775d3bca18de8eeb1a0f8c2e6fc32e78 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 14 Oct 2024 12:40:57 +0200 Subject: [PATCH] Add REST API test --- tests/python/rest_api/test_tasks.py | 95 ++++++++++++++++++++++++++-- tests/python/shared/utils/helpers.py | 12 +++- 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 408e74d88959..c57dec13f639 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -32,6 +32,7 @@ ClassVar, Dict, Generator, + Iterable, List, Optional, Sequence, @@ -1529,12 +1530,13 @@ def _create_task_with_cloud_data( server_files: List[str], use_cache: bool = True, sorting_method: str = "lexicographical", - spec: Optional[Dict[str, Any]] = None, data_type: str = "image", video_frame_count: int = 10, server_files_exclude: Optional[List[str]] = None, - org: Optional[str] = None, + org: str = "", filenames: Optional[List[str]] = None, + task_spec_kwargs: Optional[Dict[str, Any]] = None, + data_spec_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[int, Any]: s3_client = s3.make_client(bucket=cloud_storage["resource"]) if data_type == "video": @@ -1551,7 +1553,9 @@ def _create_task_with_cloud_data( ) else: images = generate_image_files( - 3, **({"prefixes": ["img_"] * 3} if not filenames else {"filenames": filenames}) + 3, + sizes=[(100, 50) if i % 2 else (50, 100) for i in range(3)], + **({"prefixes": ["img_"] * 3} if not filenames else {"filenames": filenames}), ) for image in images: @@ -1598,6 +1602,7 @@ def _create_task_with_cloud_data( "name": "car", } ], + **(task_spec_kwargs or {}), } data_spec = { @@ -1608,9 +1613,8 @@ def _create_task_with_cloud_data( server_files if not use_manifest else server_files + ["test/manifest.jsonl"] ), "sorting_method": sorting_method, + **(data_spec_kwargs or {}), } - if spec is not None: - data_spec.update(spec) if server_files_exclude: data_spec["server_files_exclude"] = server_files_exclude @@ -1984,7 +1988,7 @@ def test_create_task_with_cloud_storage_and_check_retrieve_data_meta( use_cache=False, server_files=["test/video/video.avi"], org=org, - spec=data_spec, + data_spec_kwargs=data_spec, data_type="video", ) @@ -2550,6 +2554,85 @@ def test_can_create_task_with_gt_job_from_video( else: assert len(validation_frames) == validation_frames_count + @pytest.mark.with_external_services + @pytest.mark.parametrize("cloud_storage_id", [2]) + @pytest.mark.parametrize( + "validation_mode", + [ + models.ValidationMode("gt"), + models.ValidationMode("gt_pool"), + ], + ) + def test_can_create_task_with_validation_and_cloud_data( + self, + cloud_storage_id: int, + validation_mode: models.ValidationMode, + request: pytest.FixtureRequest, + admin_user: str, + cloud_storages: Iterable, + ): + cloud_storage = cloud_storages[cloud_storage_id] + server_files = [f"test/sub_0/img_{i}.jpeg" for i in range(3)] + validation_frames = ["test/sub_0/img_1.jpeg"] + + (task_id, _) = self._create_task_with_cloud_data( + request, + cloud_storage, + use_manifest=False, + server_files=server_files, + sorting_method=models.SortingMethod( + "random" + ), # only random sorting can be used with gt_pool + data_spec_kwargs={ + "validation_params": models.DataRequestValidationParams._from_openapi_data( + mode=validation_mode, + frames=validation_frames, + frame_selection_method=models.FrameSelectionMethod("manual"), + frames_per_job_count=1, + ) + }, + task_spec_kwargs={ + # in case of gt_pool: each regular job will contain 1 regular and 1 validation frames, + # (number of validation frames is not included into segment_size) + "segment_size": 1, + }, + ) + + with make_api_client(admin_user) as api_client: + # check that GT job was created + (paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="ground_truth") + assert 1 == len(paginated_jobs["results"]) + + (paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="annotation") + jobs_count = ( + len(server_files) - len(validation_frames) + if validation_mode == models.ValidationMode("gt_pool") + else len(server_files) + ) + assert jobs_count == len(paginated_jobs["results"]) + # check that the returned meta of images corresponds to the chunk data + # Note: meta is based on the order of images from database + # while chunk with CS data is based on the order of images in a manifest + for job in paginated_jobs["results"]: + (job_meta, _) = api_client.jobs_api.retrieve_data_meta(job["id"]) + (_, response) = api_client.jobs_api.retrieve_data( + job["id"], type="chunk", quality="compressed", index=0 + ) + chunk_file = io.BytesIO(response.data) + assert zipfile.is_zipfile(chunk_file) + + with zipfile.ZipFile(chunk_file, "r") as chunk_archive: + chunk_images = { + int(os.path.splitext(name)[0]): np.array( + Image.open(io.BytesIO(chunk_archive.read(name))) + ) + for name in chunk_archive.namelist() + } + chunk_images = dict(sorted(chunk_images.items(), key=lambda e: e[0])) + + for img, img_meta in zip(chunk_images.values(), job_meta.frames): + assert (img.shape[0], img.shape[1]) == (img_meta.height, img_meta.width) + class _SourceDataType(str, Enum): images = "images" diff --git a/tests/python/shared/utils/helpers.py b/tests/python/shared/utils/helpers.py index ac5948182d78..14015f4b2ad3 100644 --- a/tests/python/shared/utils/helpers.py +++ b/tests/python/shared/utils/helpers.py @@ -5,7 +5,7 @@ import subprocess from contextlib import closing from io import BytesIO -from typing import Generator, List, Optional +from typing import Generator, List, Optional, Tuple import av import av.video.reformatter @@ -25,7 +25,11 @@ def generate_image_file(filename="image.png", size=(100, 50), color=(0, 0, 0)): def generate_image_files( - count, prefixes=None, *, filenames: Optional[List[str]] = None + count: int, + *, + prefixes: Optional[List[str]] = None, + filenames: Optional[List[str]] = None, + sizes: Optional[List[Tuple[int, int]]] = None, ) -> List[BytesIO]: assert not (prefixes and filenames), "prefixes cannot be used together with filenames" assert not prefixes or len(prefixes) == count @@ -35,7 +39,9 @@ def generate_image_files( for i in range(count): prefix = prefixes[i] if prefixes else "" filename = f"{prefix}{i}.jpeg" if not filenames else filenames[i] - image = generate_image_file(filename, color=(i, i, i)) + image = generate_image_file( + filename, color=(i, i, i), **({"size": sizes[i]}) if sizes else {} + ) images.append(image) return images