From 6cf02805358682fdc88439e45f6dc70d0f421062 Mon Sep 17 00:00:00 2001
From: Eric Hofesmann <ehofesmann@gmail.com>
Date: Wed, 1 Dec 2021 15:10:17 -0500
Subject: [PATCH 1/3] add task_size parameter

---
 docs/source/integrations/cvat.rst | 60 +++++++++++++++++++++++++++++++
 fiftyone/utils/cvat.py            | 22 ++++++++----
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/docs/source/integrations/cvat.rst b/docs/source/integrations/cvat.rst
index df63d18f5b..329cf6f794 100644
--- a/docs/source/integrations/cvat.rst
+++ b/docs/source/integrations/cvat.rst
@@ -489,6 +489,10 @@ provided:
 -   **occluded_attr** (*None*): an optional attribute name containing existing
     occluded values and/or in which to store downloaded occluded values for all
     objects in the annotation run
+-   **task_size** (*None*): an optional integer specifying the maximum number of
+    images to upload per CVAT task. Videos are always uploaded one per
+    task
+
 
 .. _cvat-label-schema:
 
@@ -1851,6 +1855,62 @@ attributes between annotation runs.
    :alt: cvat-occ-widget
    :align: center
 
+Large annotation runs
+---------------------
+
+The CVAT REST API imposes a limit on the size of requests. This can cause
+errors when uploading annotation runs for sample collections with many
+images or labels to CVAT.
+
+.. note::
+
+    The CVAT maintainers are working on 
+    `an update <https://github.com/openvinotoolkit/cvat/pull/3692>`_
+    to resolve this issue natively. For the time being, the following workflow
+    is our recommended approach to circumvent this issue.
+
+In order to break an annotation run into smaller segments, we provide a
+`task_size` parameter that limits the number of samples that can be uploaded to
+a single task in CVAT. Due to the creation of multiple tasks when `task_size`
+is provided, we recommend providing a `project_name` to group the created tasks
+together.
+
+The `task_size` parameter can be used in conjunction with `segment_size` to
+both define the number of images per task and the number of images per job
+within each task.
+
+.. code:: python
+    :linenos:
+
+    import fiftyone.zoo as foz
+    import fiftyone as fo
+    
+    dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone()
+    
+    anno_key = "batch_upload"
+    project_name = "batch_example"
+
+    task_size = 6  # 6 images per task
+    segment_size = 2 # 2 images per job
+    
+    results = dataset.annotate(
+        anno_key,
+        label_field="ground_truth",
+        task_size=task_size,
+        segment_size=segment_size,
+        project_name=project_name,
+        launch_editor=True,
+    )
+    
+    # Annotate in CVAT
+    
+    dataset.load_annotations(anno_key, cleanup=True) 
+
+.. note::
+
+    The `task_size` parameter only applies to image datasets since videos are
+    always uploaded one per task.
+
 .. _cvat-annotating-videos:
 
 Annotating videos
diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py
index 0761767678..1d446b9e52 100644
--- a/fiftyone/utils/cvat.py
+++ b/fiftyone/utils/cvat.py
@@ -2525,6 +2525,9 @@ class CVATBackendConfig(foua.AnnotationBackendConfig):
         occluded_attr (None): an optional attribute name containing existing
             occluded values and/or in which to store downloaded occluded values
             for all objects in the annotation run
+        task_size (None): an optional integer specifying the maximum number of
+            images to upload per CVAT task. Videos are always uploaded one per
+            task
     """
 
     def __init__(
@@ -2547,6 +2550,7 @@ def __init__(
         project_name=None,
         project_id=None,
         occluded_attr=None,
+        task_size=None,
         **kwargs,
     ):
         super().__init__(name, label_schema, media_field=media_field, **kwargs)
@@ -2562,6 +2566,7 @@ def __init__(
         self.project_name = project_name
         self.project_id = project_id
         self.occluded_attr = occluded_attr
+        self.task_size = task_size
 
         # store privately so these aren't serialized
         self._username = username
@@ -3443,6 +3448,7 @@ def upload_samples(self, samples, backend):
         config = backend.config
         label_schema = config.label_schema
         occluded_attr = config.occluded_attr
+        task_size = config.task_size
         project_name, project_id = self._parse_project_details(
             config.project_name, config.project_id
         )
@@ -3461,7 +3467,7 @@ def upload_samples(self, samples, backend):
         labels_task_map = {}
 
         num_samples = len(samples)
-        batch_size = self._get_batch_size(samples)
+        batch_size = self._get_batch_size(samples, task_size)
 
         (
             cvat_schema,
@@ -3885,7 +3891,9 @@ def _ensure_one_field_per_type(self, label_schema, verbose=True):
                     label_field,
                 )
 
-    def _get_batch_size(self, samples):
+    def _get_batch_size(self, samples, task_size):
+        samples.compute_metadata()
+
         if samples.media_type == fom.VIDEO:
             # The current implementation (both upload and download) requires
             # frame IDs for all frames that might get labels
@@ -3894,10 +3902,12 @@ def _get_batch_size(self, samples):
             # CVAT only allows for one video per task
             return 1
 
-        samples.compute_metadata()
-
-        # Put all image samples in one task
-        return len(samples)
+        num_samples = len(samples)
+        if task_size is None:
+            # Put all image samples in one task
+            return num_samples
+        else:
+            return min(task_size, num_samples)
 
     def _create_task_upload_data(
         self,

From 4ce410d1ab181177fc4e9a45d2319dda71060f87 Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Tue, 28 Dec 2021 11:32:32 -0600
Subject: [PATCH 2/3] linting

---
 docs/source/integrations/cvat.rst | 55 ++++++++++++++-----------------
 fiftyone/utils/cvat.py            | 14 ++++----
 2 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/docs/source/integrations/cvat.rst b/docs/source/integrations/cvat.rst
index 329cf6f794..9d46de6668 100644
--- a/docs/source/integrations/cvat.rst
+++ b/docs/source/integrations/cvat.rst
@@ -464,6 +464,8 @@ In addition, the following CVAT-specific parameters from
 :class:`CVATBackendConfig <fiftyone.utils.cvat.CVATBackendConfig>` can also be
 provided:
 
+-   **task_size** (*None*): an optional maximum number of images to upload per
+    task. Videos are always uploaded one per task
 -   **segment_size** (*None*): the maximum number of images to upload per job.
     Not applicable to videos
 -   **image_quality** (*75*): an int in `[0, 100]` determining the image
@@ -489,10 +491,6 @@ provided:
 -   **occluded_attr** (*None*): an optional attribute name containing existing
     occluded values and/or in which to store downloaded occluded values for all
     objects in the annotation run
--   **task_size** (*None*): an optional integer specifying the maximum number of
-    images to upload per CVAT task. Videos are always uploaded one per
-    task
-
 
 .. _cvat-label-schema:
 
@@ -1855,60 +1853,57 @@ attributes between annotation runs.
    :alt: cvat-occ-widget
    :align: center
 
+.. _cvat-large-runs:
+
 Large annotation runs
 ---------------------
 
-The CVAT REST API imposes a limit on the size of requests. This can cause
-errors when uploading annotation runs for sample collections with many
-images or labels to CVAT.
+The CVAT API imposes a limit on the size of all requests. By default, all
+images are uploaded to a single CVAT task, which can result in errors when
+uploading annotation runs for large sample collections.
 
 .. note::
 
-    The CVAT maintainers are working on 
+    The CVAT maintainers are working on
     `an update <https://github.com/openvinotoolkit/cvat/pull/3692>`_
-    to resolve this issue natively. For the time being, the following workflow
-    is our recommended approach to circumvent this issue.
+    to resolve this issue natively. In the meantime, the following workflow is
+    our recommended approach to circumvent this issue.
 
-In order to break an annotation run into smaller segments, we provide a
-`task_size` parameter that limits the number of samples that can be uploaded to
-a single task in CVAT. Due to the creation of multiple tasks when `task_size`
-is provided, we recommend providing a `project_name` to group the created tasks
-together.
+You can use the `task_size` parameter to break image annotation runs into
+multiple CVAT tasks, each with a specified maximum number of images. Note that
+we recommend providing a `project_name` whenever you use the `task_size`
+pararmeter so that the created tasks will be grouped together.
 
-The `task_size` parameter can be used in conjunction with `segment_size` to
-both define the number of images per task and the number of images per job
-within each task.
+The `task_size` parameter can also be used in conjunction with the
+`segment_size` parameter to configure both the number of images per task as
+well as the number of images per job within each task.
 
 .. code:: python
     :linenos:
 
-    import fiftyone.zoo as foz
     import fiftyone as fo
+    import fiftyone.zoo as foz
     
     dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone()
     
     anno_key = "batch_upload"
-    project_name = "batch_example"
 
-    task_size = 6  # 6 images per task
-    segment_size = 2 # 2 images per job
-    
     results = dataset.annotate(
         anno_key,
         label_field="ground_truth",
-        task_size=task_size,
-        segment_size=segment_size,
-        project_name=project_name,
+        task_size=6,  # 6 images per task
+        segment_size=2,  # 2 images per job
+        project_name="batch_example",
         launch_editor=True,
     )
-    
-    # Annotate in CVAT
-    
+
+    # Annotate in CVAT...
+
     dataset.load_annotations(anno_key, cleanup=True) 
 
 .. note::
 
-    The `task_size` parameter only applies to image datasets since videos are
+    The `task_size` parameter only applies to image datasets, since videos are
     always uploaded one per task.
 
 .. _cvat-annotating-videos:
diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py
index 1d446b9e52..b3cbf1d60b 100644
--- a/fiftyone/utils/cvat.py
+++ b/fiftyone/utils/cvat.py
@@ -2498,6 +2498,8 @@ class CVATBackendConfig(foua.AnnotationBackendConfig):
         password (None): the CVAT password
         headers (None): an optional dict of headers to add to all CVAT API
             requests
+        task_size (None): an optional maximum number of images to upload per
+            task. Videos are always uploaded one per task
         segment_size (None): maximum number of images per job. Not applicable
             to videos
         image_quality (75): an int in `[0, 100]` determining the image quality
@@ -2525,9 +2527,6 @@ class CVATBackendConfig(foua.AnnotationBackendConfig):
         occluded_attr (None): an optional attribute name containing existing
             occluded values and/or in which to store downloaded occluded values
             for all objects in the annotation run
-        task_size (None): an optional integer specifying the maximum number of
-            images to upload per CVAT task. Videos are always uploaded one per
-            task
     """
 
     def __init__(
@@ -2539,6 +2538,7 @@ def __init__(
         username=None,
         password=None,
         headers=None,
+        task_size=None,
         segment_size=None,
         image_quality=75,
         use_cache=True,
@@ -2550,11 +2550,11 @@ def __init__(
         project_name=None,
         project_id=None,
         occluded_attr=None,
-        task_size=None,
         **kwargs,
     ):
         super().__init__(name, label_schema, media_field=media_field, **kwargs)
         self.url = url
+        self.task_size = task_size
         self.segment_size = segment_size
         self.image_quality = image_quality
         self.use_cache = use_cache
@@ -2566,7 +2566,6 @@ def __init__(
         self.project_name = project_name
         self.project_id = project_id
         self.occluded_attr = occluded_attr
-        self.task_size = task_size
 
         # store privately so these aren't serialized
         self._username = username
@@ -3903,11 +3902,12 @@ def _get_batch_size(self, samples, task_size):
             return 1
 
         num_samples = len(samples)
+
         if task_size is None:
             # Put all image samples in one task
             return num_samples
-        else:
-            return min(task_size, num_samples)
+
+        return min(task_size, num_samples)
 
     def _create_task_upload_data(
         self,

From 79454be7559d48b5e7ece92846b891410c1f7a30 Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Tue, 28 Dec 2021 11:41:21 -0600
Subject: [PATCH 3/3] organizing examples

---
 docs/source/integrations/cvat.rst | 106 +++++++++++++++---------------
 1 file changed, 53 insertions(+), 53 deletions(-)

diff --git a/docs/source/integrations/cvat.rst b/docs/source/integrations/cvat.rst
index 9d46de6668..ad60cc394e 100644
--- a/docs/source/integrations/cvat.rst
+++ b/docs/source/integrations/cvat.rst
@@ -1646,6 +1646,59 @@ will be assigned using a round-robin strategy.
     results.cleanup()
     dataset.delete_annotation_run(anno_key)
 
+.. _cvat-large-runs:
+
+Large annotation runs
+---------------------
+
+The CVAT API imposes a limit on the size of all requests. By default, all
+images are uploaded to a single CVAT task, which can result in errors when
+uploading annotation runs for large sample collections.
+
+.. note::
+
+    The CVAT maintainers are working on
+    `an update <https://github.com/openvinotoolkit/cvat/pull/3692>`_
+    to resolve this issue natively. In the meantime, the following workflow is
+    our recommended approach to circumvent this issue.
+
+You can use the `task_size` parameter to break image annotation runs into
+multiple CVAT tasks, each with a specified maximum number of images. Note that
+we recommend providing a `project_name` whenever you use the `task_size`
+pararmeter so that the created tasks will be grouped together.
+
+The `task_size` parameter can also be used in conjunction with the
+`segment_size` parameter to configure both the number of images per task as
+well as the number of images per job within each task.
+
+.. code:: python
+    :linenos:
+
+    import fiftyone as fo
+    import fiftyone.zoo as foz
+
+    dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone()
+
+    anno_key = "batch_upload"
+
+    results = dataset.annotate(
+        anno_key,
+        label_field="ground_truth",
+        task_size=6,  # 6 images per task
+        segment_size=2,  # 2 images per job
+        project_name="batch_example",
+        launch_editor=True,
+    )
+
+    # Annotate in CVAT...
+
+    dataset.load_annotations(anno_key, cleanup=True)
+
+.. note::
+
+    The `task_size` parameter only applies to image datasets, since videos are
+    always uploaded one per task.
+
 Scalar labels
 -------------
 
@@ -1853,59 +1906,6 @@ attributes between annotation runs.
    :alt: cvat-occ-widget
    :align: center
 
-.. _cvat-large-runs:
-
-Large annotation runs
----------------------
-
-The CVAT API imposes a limit on the size of all requests. By default, all
-images are uploaded to a single CVAT task, which can result in errors when
-uploading annotation runs for large sample collections.
-
-.. note::
-
-    The CVAT maintainers are working on
-    `an update <https://github.com/openvinotoolkit/cvat/pull/3692>`_
-    to resolve this issue natively. In the meantime, the following workflow is
-    our recommended approach to circumvent this issue.
-
-You can use the `task_size` parameter to break image annotation runs into
-multiple CVAT tasks, each with a specified maximum number of images. Note that
-we recommend providing a `project_name` whenever you use the `task_size`
-pararmeter so that the created tasks will be grouped together.
-
-The `task_size` parameter can also be used in conjunction with the
-`segment_size` parameter to configure both the number of images per task as
-well as the number of images per job within each task.
-
-.. code:: python
-    :linenos:
-
-    import fiftyone as fo
-    import fiftyone.zoo as foz
-    
-    dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone()
-    
-    anno_key = "batch_upload"
-
-    results = dataset.annotate(
-        anno_key,
-        label_field="ground_truth",
-        task_size=6,  # 6 images per task
-        segment_size=2,  # 2 images per job
-        project_name="batch_example",
-        launch_editor=True,
-    )
-
-    # Annotate in CVAT...
-
-    dataset.load_annotations(anno_key, cleanup=True) 
-
-.. note::
-
-    The `task_size` parameter only applies to image datasets, since videos are
-    always uploaded one per task.
-
 .. _cvat-annotating-videos:
 
 Annotating videos