cvat-ai · Eldies · Dec 4, 2024 · Nov 11, 2024 · Nov 12, 2024 · Nov 13, 2024
@@ -0,0 +1,4 @@
+### Fixed
+
+- Optimized memory consumption and reduced the number of database queries when importing annotations to a task with a lot of jobs and images
+  (<https://github.com/cvat-ai/cvat/pull/8676>)
@@ -13,7 +13,7 @@
 from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError
 
 from django.db import transaction
-from django.db.models.query import Prefetch
+from django.db.models.query import Prefetch, QuerySet
 from django.conf import settings
 from rest_framework.exceptions import ValidationError
 
@@ -81,9 +81,10 @@ def merge_table_rows(rows, keys_for_merge, field_id):
 
     return list(merged_rows.values())
 
+
 class JobAnnotation:
     @classmethod
-    def add_prefetch_info(cls, queryset):
+    def add_prefetch_info(cls, queryset: QuerySet, prefetch_images: bool = True):
         assert issubclass(queryset.model, models.Job)
 
         label_qs = add_prefetch_fields(models.Label.objects.all(), [
@@ -93,6 +94,12 @@ def add_prefetch_info(cls, queryset):
         ])
         label_qs = JobData.add_prefetch_info(label_qs)
 
+        task_data_queryset = models.Data.objects.all()
+        if prefetch_images:
+            task_data_queryset = task_data_queryset.select_related('video').prefetch_related(
+                Prefetch('images', queryset=models.Image.objects.order_by('frame'))
+            )
+
         return queryset.select_related(
             'segment',
             'segment__task',
@@ -103,18 +110,15 @@ def add_prefetch_info(cls, queryset):
             'segment__task__project__owner',
             'segment__task__project__assignee',
 
-            Prefetch('segment__task__data',
-                queryset=models.Data.objects.select_related('video').prefetch_related(
-                    Prefetch('images', queryset=models.Image.objects.order_by('frame'))
-            )),
+            Prefetch('segment__task__data', queryset=task_data_queryset),
 
             Prefetch('segment__task__label_set', queryset=label_qs),
             Prefetch('segment__task__project__label_set', queryset=label_qs),
         )
 
-    def __init__(self, pk, *, is_prefetched=False, queryset=None):
+    def __init__(self, pk, *, is_prefetched: bool = False, queryset: QuerySet = None, prefetch_images: bool = False):
         if queryset is None:
-            queryset = self.add_prefetch_info(models.Job.objects)
+            queryset = self.add_prefetch_info(models.Job.objects, prefetch_images=prefetch_images)
 
         if is_prefetched:
             self.db_job: models.Job = queryset.select_related(
@@ -1006,6 +1010,7 @@ def get_job_data(pk):
 
     return annotation.data
 
+
 @silk_profile(name="POST job data")
 @transaction.atomic
 def put_job_data(pk, data):
@@ -1014,6 +1019,7 @@ def put_job_data(pk, data):
 
     return annotation.data
 
+
 @silk_profile(name="UPDATE job data")
 @plugin_decorator
 @transaction.atomic
@@ -1028,26 +1034,29 @@ def patch_job_data(pk, data, action):
 
     return annotation.data
 
+
 @silk_profile(name="DELETE job data")
 @transaction.atomic
 def delete_job_data(pk):
     annotation = JobAnnotation(pk)
     annotation.delete()
 
+
 def export_job(job_id, dst_file, format_name, server_url=None, save_images=False):
     # For big tasks dump function may run for a long time and
     # we dont need to acquire lock after the task has been initialized from DB.
     # But there is the bug with corrupted dump file in case 2 or
     # more dump request received at the same time:
     # https://github.com/cvat-ai/cvat/issues/217
     with transaction.atomic():
-        job = JobAnnotation(job_id)
+        job = JobAnnotation(job_id, prefetch_images=True)
         job.init_from_db()
 
     exporter = make_exporter(format_name)
     with open(dst_file, 'wb') as f:
         job.export(f, exporter, host=server_url, save_images=save_images)
 
+
 @silk_profile(name="GET task data")
 @transaction.atomic
 def get_task_data(pk):
@@ -1056,6 +1065,7 @@ def get_task_data(pk):
 
     return annotation.data
 
+
 @silk_profile(name="POST task data")
 @transaction.atomic
 def put_task_data(pk, data):
@@ -1064,6 +1074,7 @@ def put_task_data(pk, data):
 
     return annotation.data
 
+
 @silk_profile(name="UPDATE task data")
 @transaction.atomic
 def patch_task_data(pk, data, action):
@@ -1108,9 +1119,10 @@ def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly):
         except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex:
             raise CvatImportError(str(ex))
 
+
 @transaction.atomic
 def import_job_annotations(src_file, job_id, format_name, conv_mask_to_poly):
-    job = JobAnnotation(job_id)
+    job = JobAnnotation(job_id, prefetch_images=True)
 
     importer = make_importer(format_name)
     with open(src_file, 'rb') as f:

diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py
@@ -572,7 +572,7 @@ def add_prefetch_info(cls, queryset):
     @transaction.atomic
     def __init__(self, job_id: int, *, queryset=None, included_frames=None) -> None:
         self.job_id = job_id
-        self.job_annotation = JobAnnotation(job_id, queryset=queryset)
+        self.job_annotation = JobAnnotation(job_id, queryset=queryset, prefetch_images=True)
         self.job_annotation.init_from_db()
         self.job_data = JobData(
             annotation_ir=self.job_annotation.ir_data,