Skip to content

Commit

Permalink
Optimized database requests when removing resources (#8192)
Browse files Browse the repository at this point in the history
  • Loading branch information
bsekachev authored Jul 31, 2024
1 parent c53743e commit e0da3a2
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 30 deletions.
58 changes: 46 additions & 12 deletions cvat/apps/dataset_manager/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from cvat.apps.engine import models, serializers
from cvat.apps.engine.plugins import plugin_decorator
from cvat.apps.engine.log import DatasetLogManager
from cvat.apps.engine.utils import chunked_list
from cvat.apps.events.handlers import handle_annotations_change
from cvat.apps.profiler import silk_profile

Expand Down Expand Up @@ -437,24 +438,52 @@ def update(self, data):
if not self._data_is_empty(self.data):
self._set_updated_date()

def _delete_job_labeledimages(self, ids__UNSAFE: list[int]) -> None:
# ids__UNSAFE is a list, received from the user
# we MUST filter it by job_id additionally before applying to any queries
ids = self.db_job.labeledimage_set.filter(pk__in=ids__UNSAFE).values_list('id', flat=True)
models.LabeledImageAttributeVal.objects.filter(image_id__in=ids).delete()
self.db_job.labeledimage_set.filter(pk__in=ids).delete()

def _delete_job_labeledshapes(self, ids__UNSAFE: list[int], *, is_subcall: bool = False) -> None:
# ids__UNSAFE is a list, received from the user
# we MUST filter it by job_id additionally before applying to any queries
if is_subcall:
ids = ids__UNSAFE
else:
ids = self.db_job.labeledshape_set.filter(pk__in=ids__UNSAFE).values_list('id', flat=True)
child_ids = self.db_job.labeledshape_set.filter(parent_id__in=ids).values_list('id', flat=True)
if len(child_ids):
self._delete_job_labeledshapes(child_ids, is_subcall=True)

models.LabeledShapeAttributeVal.objects.filter(shape_id__in=ids).delete()
self.db_job.labeledshape_set.filter(pk__in=ids).delete()

def _delete_job_labeledtracks(self, ids__UNSAFE: list[int], *, is_subcall: bool = False) -> None:
# ids__UNSAFE is a list, received from the user
# we MUST filter it by job_id additionally before applying to any queries
if is_subcall:
ids = ids__UNSAFE
else:
ids = self.db_job.labeledtrack_set.filter(pk__in=ids__UNSAFE).values_list('id', flat=True)
child_ids = self.db_job.labeledtrack_set.filter(parent_id__in=ids).values_list('id', flat=True)
if len(child_ids):
self._delete_job_labeledtracks(child_ids, is_subcall=True)

models.TrackedShapeAttributeVal.objects.filter(shape__track_id__in=ids).delete()
models.LabeledTrackAttributeVal.objects.filter(track_id__in=ids).delete()
self.db_job.labeledtrack_set.filter(pk__in=ids).delete()

def _delete(self, data=None):
deleted_data = {}
if data is None:
self.init_from_db()
deleted_data = self.data
self.db_job.labeledimage_set.all().delete()
self.db_job.labeledshape_set.all().delete()
self.db_job.labeledtrack_set.all().delete()
models.clear_annotations_in_jobs([self.db_job.id])
else:
labeledimage_ids = [image["id"] for image in data["tags"]]
labeledshape_ids = [shape["id"] for shape in data["shapes"]]
labeledtrack_ids = [track["id"] for track in data["tracks"]]
labeledimage_set = self.db_job.labeledimage_set
labeledimage_set = labeledimage_set.filter(pk__in=labeledimage_ids)
labeledshape_set = self.db_job.labeledshape_set
labeledshape_set = labeledshape_set.filter(pk__in=labeledshape_ids)
labeledtrack_set = self.db_job.labeledtrack_set
labeledtrack_set = labeledtrack_set.filter(pk__in=labeledtrack_ids)

# It is not important for us that data had some "invalid" objects
# which were skipped (not actually deleted). The main idea is to
Expand All @@ -463,9 +492,14 @@ def _delete(self, data=None):
self.ir_data.shapes = data['shapes']
self.ir_data.tracks = data['tracks']

labeledimage_set.delete()
labeledshape_set.delete()
labeledtrack_set.delete()
for labeledimage_ids_chunk in chunked_list(labeledimage_ids, chunk_size=1000):
self._delete_job_labeledimages(labeledimage_ids_chunk)

for labeledshape_ids_chunk in chunked_list(labeledshape_ids, chunk_size=1000):
self._delete_job_labeledshapes(labeledshape_ids_chunk)

for labeledtrack_ids_chunk in chunked_list(labeledtrack_ids, chunk_size=1000):
self._delete_job_labeledtracks(labeledtrack_ids_chunk)

deleted_data = {
"tags": data["tags"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Generated by Django 4.2.14 on 2024-07-22 07:27

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("engine", "0081_job_assignee_updated_date_and_more"),
]

operations = [
migrations.AlterField(
model_name="labeledimage",
name="job",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="engine.job"
),
),
migrations.AlterField(
model_name="labeledimageattributeval",
name="image",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.labeledimage",
),
),
migrations.AlterField(
model_name="labeledshape",
name="job",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="engine.job"
),
),
migrations.AlterField(
model_name="labeledshape",
name="parent",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="elements",
to="engine.labeledshape",
),
),
migrations.AlterField(
model_name="labeledshapeattributeval",
name="shape",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.labeledshape",
),
),
migrations.AlterField(
model_name="labeledtrack",
name="job",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="engine.job"
),
),
migrations.AlterField(
model_name="labeledtrack",
name="parent",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="elements",
to="engine.labeledtrack",
),
),
migrations.AlterField(
model_name="labeledtrackattributeval",
name="track",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.labeledtrack",
),
),
migrations.AlterField(
model_name="trackedshapeattributeval",
name="shape",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.trackedshape",
),
),
]
63 changes: 45 additions & 18 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema_field

from cvat.apps.engine.utils import parse_specific_attributes
from cvat.apps.engine.utils import parse_specific_attributes, chunked_list
from cvat.apps.events.utils import cache_deleted

class SafeCharField(models.CharField):
Expand Down Expand Up @@ -325,6 +325,18 @@ class Meta:
def touch(self) -> None:
self.save(update_fields=["updated_date"])

@transaction.atomic(savepoint=False)
def clear_annotations_in_jobs(job_ids):
for job_ids_chunk in chunked_list(job_ids, chunk_size=1000):
TrackedShapeAttributeVal.objects.filter(shape__track__job_id__in=job_ids_chunk).delete()
TrackedShape.objects.filter(track__job_id__in=job_ids_chunk).delete()
LabeledTrackAttributeVal.objects.filter(track__job_id__in=job_ids_chunk).delete()
LabeledTrack.objects.filter(job_id__in=job_ids_chunk).delete()
LabeledShapeAttributeVal.objects.filter(shape__job_id__in=job_ids_chunk).delete()
LabeledShape.objects.filter(job_id__in=job_ids_chunk).delete()
LabeledImageAttributeVal.objects.filter(image__job_id__in=job_ids_chunk).delete()
LabeledImage.objects.filter(job_id__in=job_ids_chunk).delete()

class Project(TimestampedModel):
name = SafeCharField(max_length=256)
owner = models.ForeignKey(User, null=True, blank=True,
Expand Down Expand Up @@ -367,7 +379,15 @@ def is_job_staff(self, user_id):
).count() > 0

@cache_deleted
@transaction.atomic(savepoint=False)
def delete(self, using=None, keep_parents=False):
# quicker way to remove annotations and a way to reduce number of queries
# is to remove labels and attributes first, it will remove annotations cascadely

# child objects must be removed first
if self.label_set.exclude(parent=None).count():
self.label_set.exclude(parent=None).delete()
self.label_set.filter(parent=None).delete()
super().delete(using, keep_parents)

# Extend default permission model
Expand Down Expand Up @@ -477,7 +497,19 @@ def __str__(self):
return self.name

@cache_deleted
@transaction.atomic(savepoint=False)
def delete(self, using=None, keep_parents=False):
if not self.project:
# quicker way to remove annotations and a way to reduce number of queries
# is to remove labels and attributes first, it will remove annotations cascadely

# child objects must be removed first
if self.label_set.exclude(parent=None).count():
self.label_set.exclude(parent=None).delete()
self.label_set.filter(parent=None).delete()
else:
job_ids = list(self.segment_set.values_list('job__id', flat=True))
clear_annotations_in_jobs(job_ids)
super().delete(using, keep_parents)

# Redefined a couple of operation for FileSystemStorage to avoid renaming
Expand Down Expand Up @@ -756,18 +788,13 @@ def clean(self) -> None:
return super().clean()

@cache_deleted
@transaction.atomic(savepoint=False)
def delete(self, using=None, keep_parents=False):
if self.segment:
self.segment.delete(using=using, keep_parents=keep_parents)

clear_annotations_in_jobs([self.id])
segment = self.segment
super().delete(using, keep_parents)

self.delete_dirs()

def delete_dirs(self):
job_path = self.get_dirname()
if os.path.isdir(job_path):
shutil.rmtree(job_path)
if segment:
segment.delete()

def make_dirs(self):
job_path = self.get_dirname()
Expand Down Expand Up @@ -919,7 +946,7 @@ def __str__(self):

class Annotation(models.Model):
id = models.BigAutoField(primary_key=True)
job = models.ForeignKey(Job, on_delete=models.CASCADE)
job = models.ForeignKey(Job, on_delete=models.DO_NOTHING)
label = models.ForeignKey(Label, on_delete=models.CASCADE)
frame = models.PositiveIntegerField()
group = models.PositiveIntegerField(null=True)
Expand All @@ -946,21 +973,21 @@ class LabeledImage(Annotation):
pass

class LabeledImageAttributeVal(AttributeVal):
image = models.ForeignKey(LabeledImage, on_delete=models.CASCADE,
image = models.ForeignKey(LabeledImage, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class LabeledShape(Annotation, Shape):
parent = models.ForeignKey('self', on_delete=models.CASCADE, null=True, related_name='elements')
parent = models.ForeignKey('self', on_delete=models.DO_NOTHING, null=True, related_name='elements')

class LabeledShapeAttributeVal(AttributeVal):
shape = models.ForeignKey(LabeledShape, on_delete=models.CASCADE,
shape = models.ForeignKey(LabeledShape, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class LabeledTrack(Annotation):
parent = models.ForeignKey('self', on_delete=models.CASCADE, null=True, related_name='elements')
parent = models.ForeignKey('self', on_delete=models.DO_NOTHING, null=True, related_name='elements')

class LabeledTrackAttributeVal(AttributeVal):
track = models.ForeignKey(LabeledTrack, on_delete=models.CASCADE,
track = models.ForeignKey(LabeledTrack, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class TrackedShape(Shape):
Expand All @@ -970,7 +997,7 @@ class TrackedShape(Shape):
frame = models.PositiveIntegerField()

class TrackedShapeAttributeVal(AttributeVal):
shape = models.ForeignKey(TrackedShape, on_delete=models.CASCADE,
shape = models.ForeignKey(TrackedShape, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class Profile(models.Model):
Expand Down
4 changes: 4 additions & 0 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,3 +412,7 @@ def directory_tree(path, max_depth=None) -> str:

def is_dataset_export(request: HttpRequest) -> bool:
return to_bool(request.query_params.get('save_images', False))

def chunked_list(lst, chunk_size):
for i in range(0, len(lst), chunk_size):
yield lst[i:i + chunk_size]

0 comments on commit e0da3a2

Please sign in to comment.