From 6cf02805358682fdc88439e45f6dc70d0f421062 Mon Sep 17 00:00:00 2001 From: Eric Hofesmann Date: Wed, 1 Dec 2021 15:10:17 -0500 Subject: [PATCH 1/3] add task_size parameter --- docs/source/integrations/cvat.rst | 60 +++++++++++++++++++++++++++++++ fiftyone/utils/cvat.py | 22 ++++++++---- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/docs/source/integrations/cvat.rst b/docs/source/integrations/cvat.rst index df63d18f5b..329cf6f794 100644 --- a/docs/source/integrations/cvat.rst +++ b/docs/source/integrations/cvat.rst @@ -489,6 +489,10 @@ provided: - **occluded_attr** (*None*): an optional attribute name containing existing occluded values and/or in which to store downloaded occluded values for all objects in the annotation run +- **task_size** (*None*): an optional integer specifying the maximum number of + images to upload per CVAT task. Videos are always uploaded one per + task + .. _cvat-label-schema: @@ -1851,6 +1855,62 @@ attributes between annotation runs. :alt: cvat-occ-widget :align: center +Large annotation runs +--------------------- + +The CVAT REST API imposes a limit on the size of requests. This can cause +errors when uploading annotation runs for sample collections with many +images or labels to CVAT. + +.. note:: + + The CVAT maintainers are working on + `an update `_ + to resolve this issue natively. For the time being, the following workflow + is our recommended approach to circumvent this issue. + +In order to break an annotation run into smaller segments, we provide a +`task_size` parameter that limits the number of samples that can be uploaded to +a single task in CVAT. Due to the creation of multiple tasks when `task_size` +is provided, we recommend providing a `project_name` to group the created tasks +together. + +The `task_size` parameter can be used in conjunction with `segment_size` to +both define the number of images per task and the number of images per job +within each task. + +.. code:: python + :linenos: + + import fiftyone.zoo as foz + import fiftyone as fo + + dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone() + + anno_key = "batch_upload" + project_name = "batch_example" + + task_size = 6 # 6 images per task + segment_size = 2 # 2 images per job + + results = dataset.annotate( + anno_key, + label_field="ground_truth", + task_size=task_size, + segment_size=segment_size, + project_name=project_name, + launch_editor=True, + ) + + # Annotate in CVAT + + dataset.load_annotations(anno_key, cleanup=True) + +.. note:: + + The `task_size` parameter only applies to image datasets since videos are + always uploaded one per task. + .. _cvat-annotating-videos: Annotating videos diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index 0761767678..1d446b9e52 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -2525,6 +2525,9 @@ class CVATBackendConfig(foua.AnnotationBackendConfig): occluded_attr (None): an optional attribute name containing existing occluded values and/or in which to store downloaded occluded values for all objects in the annotation run + task_size (None): an optional integer specifying the maximum number of + images to upload per CVAT task. Videos are always uploaded one per + task """ def __init__( @@ -2547,6 +2550,7 @@ def __init__( project_name=None, project_id=None, occluded_attr=None, + task_size=None, **kwargs, ): super().__init__(name, label_schema, media_field=media_field, **kwargs) @@ -2562,6 +2566,7 @@ def __init__( self.project_name = project_name self.project_id = project_id self.occluded_attr = occluded_attr + self.task_size = task_size # store privately so these aren't serialized self._username = username @@ -3443,6 +3448,7 @@ def upload_samples(self, samples, backend): config = backend.config label_schema = config.label_schema occluded_attr = config.occluded_attr + task_size = config.task_size project_name, project_id = self._parse_project_details( config.project_name, config.project_id ) @@ -3461,7 +3467,7 @@ def upload_samples(self, samples, backend): labels_task_map = {} num_samples = len(samples) - batch_size = self._get_batch_size(samples) + batch_size = self._get_batch_size(samples, task_size) ( cvat_schema, @@ -3885,7 +3891,9 @@ def _ensure_one_field_per_type(self, label_schema, verbose=True): label_field, ) - def _get_batch_size(self, samples): + def _get_batch_size(self, samples, task_size): + samples.compute_metadata() + if samples.media_type == fom.VIDEO: # The current implementation (both upload and download) requires # frame IDs for all frames that might get labels @@ -3894,10 +3902,12 @@ def _get_batch_size(self, samples): # CVAT only allows for one video per task return 1 - samples.compute_metadata() - - # Put all image samples in one task - return len(samples) + num_samples = len(samples) + if task_size is None: + # Put all image samples in one task + return num_samples + else: + return min(task_size, num_samples) def _create_task_upload_data( self, From 4ce410d1ab181177fc4e9a45d2319dda71060f87 Mon Sep 17 00:00:00 2001 From: brimoor Date: Tue, 28 Dec 2021 11:32:32 -0600 Subject: [PATCH 2/3] linting --- docs/source/integrations/cvat.rst | 55 ++++++++++++++----------------- fiftyone/utils/cvat.py | 14 ++++---- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/docs/source/integrations/cvat.rst b/docs/source/integrations/cvat.rst index 329cf6f794..9d46de6668 100644 --- a/docs/source/integrations/cvat.rst +++ b/docs/source/integrations/cvat.rst @@ -464,6 +464,8 @@ In addition, the following CVAT-specific parameters from :class:`CVATBackendConfig ` can also be provided: +- **task_size** (*None*): an optional maximum number of images to upload per + task. Videos are always uploaded one per task - **segment_size** (*None*): the maximum number of images to upload per job. Not applicable to videos - **image_quality** (*75*): an int in `[0, 100]` determining the image @@ -489,10 +491,6 @@ provided: - **occluded_attr** (*None*): an optional attribute name containing existing occluded values and/or in which to store downloaded occluded values for all objects in the annotation run -- **task_size** (*None*): an optional integer specifying the maximum number of - images to upload per CVAT task. Videos are always uploaded one per - task - .. _cvat-label-schema: @@ -1855,60 +1853,57 @@ attributes between annotation runs. :alt: cvat-occ-widget :align: center +.. _cvat-large-runs: + Large annotation runs --------------------- -The CVAT REST API imposes a limit on the size of requests. This can cause -errors when uploading annotation runs for sample collections with many -images or labels to CVAT. +The CVAT API imposes a limit on the size of all requests. By default, all +images are uploaded to a single CVAT task, which can result in errors when +uploading annotation runs for large sample collections. .. note:: - The CVAT maintainers are working on + The CVAT maintainers are working on `an update `_ - to resolve this issue natively. For the time being, the following workflow - is our recommended approach to circumvent this issue. + to resolve this issue natively. In the meantime, the following workflow is + our recommended approach to circumvent this issue. -In order to break an annotation run into smaller segments, we provide a -`task_size` parameter that limits the number of samples that can be uploaded to -a single task in CVAT. Due to the creation of multiple tasks when `task_size` -is provided, we recommend providing a `project_name` to group the created tasks -together. +You can use the `task_size` parameter to break image annotation runs into +multiple CVAT tasks, each with a specified maximum number of images. Note that +we recommend providing a `project_name` whenever you use the `task_size` +pararmeter so that the created tasks will be grouped together. -The `task_size` parameter can be used in conjunction with `segment_size` to -both define the number of images per task and the number of images per job -within each task. +The `task_size` parameter can also be used in conjunction with the +`segment_size` parameter to configure both the number of images per task as +well as the number of images per job within each task. .. code:: python :linenos: - import fiftyone.zoo as foz import fiftyone as fo + import fiftyone.zoo as foz dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone() anno_key = "batch_upload" - project_name = "batch_example" - task_size = 6 # 6 images per task - segment_size = 2 # 2 images per job - results = dataset.annotate( anno_key, label_field="ground_truth", - task_size=task_size, - segment_size=segment_size, - project_name=project_name, + task_size=6, # 6 images per task + segment_size=2, # 2 images per job + project_name="batch_example", launch_editor=True, ) - - # Annotate in CVAT - + + # Annotate in CVAT... + dataset.load_annotations(anno_key, cleanup=True) .. note:: - The `task_size` parameter only applies to image datasets since videos are + The `task_size` parameter only applies to image datasets, since videos are always uploaded one per task. .. _cvat-annotating-videos: diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index 1d446b9e52..b3cbf1d60b 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -2498,6 +2498,8 @@ class CVATBackendConfig(foua.AnnotationBackendConfig): password (None): the CVAT password headers (None): an optional dict of headers to add to all CVAT API requests + task_size (None): an optional maximum number of images to upload per + task. Videos are always uploaded one per task segment_size (None): maximum number of images per job. Not applicable to videos image_quality (75): an int in `[0, 100]` determining the image quality @@ -2525,9 +2527,6 @@ class CVATBackendConfig(foua.AnnotationBackendConfig): occluded_attr (None): an optional attribute name containing existing occluded values and/or in which to store downloaded occluded values for all objects in the annotation run - task_size (None): an optional integer specifying the maximum number of - images to upload per CVAT task. Videos are always uploaded one per - task """ def __init__( @@ -2539,6 +2538,7 @@ def __init__( username=None, password=None, headers=None, + task_size=None, segment_size=None, image_quality=75, use_cache=True, @@ -2550,11 +2550,11 @@ def __init__( project_name=None, project_id=None, occluded_attr=None, - task_size=None, **kwargs, ): super().__init__(name, label_schema, media_field=media_field, **kwargs) self.url = url + self.task_size = task_size self.segment_size = segment_size self.image_quality = image_quality self.use_cache = use_cache @@ -2566,7 +2566,6 @@ def __init__( self.project_name = project_name self.project_id = project_id self.occluded_attr = occluded_attr - self.task_size = task_size # store privately so these aren't serialized self._username = username @@ -3903,11 +3902,12 @@ def _get_batch_size(self, samples, task_size): return 1 num_samples = len(samples) + if task_size is None: # Put all image samples in one task return num_samples - else: - return min(task_size, num_samples) + + return min(task_size, num_samples) def _create_task_upload_data( self, From 79454be7559d48b5e7ece92846b891410c1f7a30 Mon Sep 17 00:00:00 2001 From: brimoor Date: Tue, 28 Dec 2021 11:41:21 -0600 Subject: [PATCH 3/3] organizing examples --- docs/source/integrations/cvat.rst | 106 +++++++++++++++--------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/docs/source/integrations/cvat.rst b/docs/source/integrations/cvat.rst index 9d46de6668..ad60cc394e 100644 --- a/docs/source/integrations/cvat.rst +++ b/docs/source/integrations/cvat.rst @@ -1646,6 +1646,59 @@ will be assigned using a round-robin strategy. results.cleanup() dataset.delete_annotation_run(anno_key) +.. _cvat-large-runs: + +Large annotation runs +--------------------- + +The CVAT API imposes a limit on the size of all requests. By default, all +images are uploaded to a single CVAT task, which can result in errors when +uploading annotation runs for large sample collections. + +.. note:: + + The CVAT maintainers are working on + `an update `_ + to resolve this issue natively. In the meantime, the following workflow is + our recommended approach to circumvent this issue. + +You can use the `task_size` parameter to break image annotation runs into +multiple CVAT tasks, each with a specified maximum number of images. Note that +we recommend providing a `project_name` whenever you use the `task_size` +pararmeter so that the created tasks will be grouped together. + +The `task_size` parameter can also be used in conjunction with the +`segment_size` parameter to configure both the number of images per task as +well as the number of images per job within each task. + +.. code:: python + :linenos: + + import fiftyone as fo + import fiftyone.zoo as foz + + dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone() + + anno_key = "batch_upload" + + results = dataset.annotate( + anno_key, + label_field="ground_truth", + task_size=6, # 6 images per task + segment_size=2, # 2 images per job + project_name="batch_example", + launch_editor=True, + ) + + # Annotate in CVAT... + + dataset.load_annotations(anno_key, cleanup=True) + +.. note:: + + The `task_size` parameter only applies to image datasets, since videos are + always uploaded one per task. + Scalar labels ------------- @@ -1853,59 +1906,6 @@ attributes between annotation runs. :alt: cvat-occ-widget :align: center -.. _cvat-large-runs: - -Large annotation runs ---------------------- - -The CVAT API imposes a limit on the size of all requests. By default, all -images are uploaded to a single CVAT task, which can result in errors when -uploading annotation runs for large sample collections. - -.. note:: - - The CVAT maintainers are working on - `an update `_ - to resolve this issue natively. In the meantime, the following workflow is - our recommended approach to circumvent this issue. - -You can use the `task_size` parameter to break image annotation runs into -multiple CVAT tasks, each with a specified maximum number of images. Note that -we recommend providing a `project_name` whenever you use the `task_size` -pararmeter so that the created tasks will be grouped together. - -The `task_size` parameter can also be used in conjunction with the -`segment_size` parameter to configure both the number of images per task as -well as the number of images per job within each task. - -.. code:: python - :linenos: - - import fiftyone as fo - import fiftyone.zoo as foz - - dataset = foz.load_zoo_dataset("quickstart", max_samples=20).clone() - - anno_key = "batch_upload" - - results = dataset.annotate( - anno_key, - label_field="ground_truth", - task_size=6, # 6 images per task - segment_size=2, # 2 images per job - project_name="batch_example", - launch_editor=True, - ) - - # Annotate in CVAT... - - dataset.load_annotations(anno_key, cleanup=True) - -.. note:: - - The `task_size` parameter only applies to image datasets, since videos are - always uploaded one per task. - .. _cvat-annotating-videos: Annotating videos