Don't fetch existing annotations in `cvat_sdk.auto_annotation.annotat…

…e_task` (cvat-ai#7019) We don't need existing annotations in order to re-annotate a task, but they were being fetched anyway, because that's how the underlying `TaskDataset` class works. Add an option to `TaskDataset` to disable annotation loading, and use it in `auto_annotate` to prevent those unnecessary fetches.
retailnext · Oct 25, 2023 · 92fd1c3 · 92fd1c3
1 parent ae04c28
commit 92fd1c3
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 21 deletions.
diff --git a/changelog.d/20231017_194639_roman_auto_annotate_no_load.md b/changelog.d/20231017_194639_roman_auto_annotate_no_load.md
@@ -0,0 +1,9 @@
+### Added
+
+- \[SDK\] A parameter to `TaskDataset` that allows you to disable annotation loading
+  (<https://github.com/opencv/cvat/pull/7019>)
+
+### Fixed
+- \[SDK\] `cvat_sdk.auto_annotation.annotate_task` no longer performs
+  unnecessary fetches of existing annotations
+  (<https://github.com/opencv/cvat/pull/7019>)
diff --git a/cvat-sdk/cvat_sdk/auto_annotation/driver.py b/cvat-sdk/cvat_sdk/auto_annotation/driver.py
@@ -268,7 +268,7 @@ def annotate_task(
     if pbar is None:
         pbar = NullProgressReporter()
 
-    dataset = TaskDataset(client, task_id)
+    dataset = TaskDataset(client, task_id, load_annotations=False)
 
     assert isinstance(function.spec, DetectionFunctionSpec)
 

diff --git a/cvat-sdk/cvat_sdk/datasets/common.py b/cvat-sdk/cvat_sdk/datasets/common.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: MIT
 
 import abc
-from typing import List
+from typing import List, Optional
 
 import attrs
 import attrs.validators
@@ -53,8 +53,12 @@ class Sample:
     frame_name: str
     """File name of the frame in its task."""
 
-    annotations: FrameAnnotations
-    """Annotations belonging to the frame."""
+    annotations: Optional[FrameAnnotations]
+    """
+    Annotations belonging to the frame.
+
+    Will be None if the dataset was created without loading annotations.
+    """
 
     media: MediaElement
     """Media data of the frame."""
diff --git a/cvat-sdk/cvat_sdk/datasets/task_dataset.py b/cvat-sdk/cvat_sdk/datasets/task_dataset.py
@@ -6,14 +6,14 @@
 
 import zipfile
 from concurrent.futures import ThreadPoolExecutor
-from typing import Sequence
+from typing import Iterable, Sequence
 
 import PIL.Image
 
 import cvat_sdk.core
 import cvat_sdk.core.exceptions
 import cvat_sdk.models as models
-from cvat_sdk.datasets.caching import UpdatePolicy, make_cache_manager
+from cvat_sdk.datasets.caching import CacheManager, UpdatePolicy, make_cache_manager
 from cvat_sdk.datasets.common import FrameAnnotations, MediaElement, Sample, UnsupportedDatasetError
 
 _NUM_DOWNLOAD_THREADS = 4
@@ -49,12 +49,17 @@ def __init__(
         task_id: int,
         *,
         update_policy: UpdatePolicy = UpdatePolicy.IF_MISSING_OR_STALE,
+        load_annotations: bool = True,
     ) -> None:
         """
         Creates a dataset corresponding to the task with ID `task_id` on the
         server that `client` is connected to.
 
         `update_policy` determines when and if the local cache will be updated.
+
+        `load_annotations` determines whether annotations will be loaded from
+        the server. If set to False, the `annotations` field in the samples will
+        be set to None.
         """
 
         self._logger = client.logger
@@ -102,6 +107,26 @@ def ensure_chunk(chunk_index):
 
         self._logger.info("All chunks downloaded")
 
+        if load_annotations:
+            self._load_annotations(cache_manager, sorted(active_frame_indexes))
+        else:
+            self._frame_annotations = {
+                frame_index: None for frame_index in sorted(active_frame_indexes)
+            }
+
+        # TODO: tracks?
+
+        self._samples = [
+            Sample(
+                frame_index=k,
+                frame_name=data_meta.frames[k].name,
+                annotations=v,
+                media=self._TaskMediaElement(self, k),
+            )
+            for k, v in self._frame_annotations.items()
+        ]
+
+    def _load_annotations(self, cache_manager: CacheManager, frame_indexes: Iterable[int]) -> None:
         annotations = cache_manager.ensure_task_model(
             self._task.id,
             "annotations.json",
@@ -110,9 +135,7 @@ def ensure_chunk(chunk_index):
             "annotations",
         )
 
-        self._frame_annotations = {
-            frame_index: FrameAnnotations() for frame_index in sorted(active_frame_indexes)
-        }
+        self._frame_annotations = {frame_index: FrameAnnotations() for frame_index in frame_indexes}
 
         for tag in annotations.tags:
             # Some annotations may belong to deleted frames; skip those.
@@ -123,18 +146,6 @@ def ensure_chunk(chunk_index):
             if shape.frame in self._frame_annotations:
                 self._frame_annotations[shape.frame].shapes.append(shape)
 
-        # TODO: tracks?
-
-        self._samples = [
-            Sample(
-                frame_index=k,
-                frame_name=data_meta.frames[k].name,
-                annotations=v,
-                media=self._TaskMediaElement(self, k),
-            )
-            for k, v in self._frame_annotations.items()
-        ]
-
     @property
     def labels(self) -> Sequence[models.ILabel]:
         """

diff --git a/tests/python/sdk/test_datasets.py b/tests/python/sdk/test_datasets.py
@@ -206,3 +206,17 @@ def test_update(self, monkeypatch: pytest.MonkeyPatch):
         )
 
         assert dataset.samples[6].annotations.shapes[0].label_id == self.expected_labels[0].id
+
+    def test_no_annotations(self):
+        dataset = cvatds.TaskDataset(self.client, self.task.id, load_annotations=False)
+
+        for index, sample in enumerate(dataset.samples):
+            assert sample.frame_index == index
+            assert sample.frame_name == self.images[index].name
+
+            actual_image = sample.media.load_image()
+            expected_image = PIL.Image.open(self.images[index])
+
+            assert actual_image == expected_image
+
+            assert sample.annotations is None