Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized database requests when removing resources #8192

Merged
merged 32 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
33da3f3
Optimized removing jobs, tasks, projects
bsekachev Jul 17, 2024
b5639d8
Minor refactoring
bsekachev Jul 17, 2024
d7cedba
Merge branch 'develop' into bs/requests_optimization
bsekachev Jul 18, 2024
32acdf6
Minimized changes
bsekachev Jul 18, 2024
d9f22f0
Implemented updating annotations in a job
bsekachev Jul 18, 2024
9fb262c
Aborted extra change
bsekachev Jul 18, 2024
7efc90f
Minor refactoring
bsekachev Jul 18, 2024
7e4db20
Added chunks
bsekachev Jul 18, 2024
78d38c7
Removed unused import
bsekachev Jul 18, 2024
5da8415
Some refactoring
bsekachev Jul 18, 2024
bb68b7b
renamed queryset
bsekachev Jul 18, 2024
56a57ab
Refactoring
bsekachev Jul 18, 2024
d422482
Minor refactoring
bsekachev Jul 18, 2024
ad3af6e
Optimized task/project removing
bsekachev Jul 18, 2024
2548ea6
Optimized task/project removing
bsekachev Jul 18, 2024
20bd035
Remove before remove
bsekachev Jul 18, 2024
8cc3c67
Updated migration
bsekachev Jul 22, 2024
f3210fa
Minor refactoring
bsekachev Jul 22, 2024
a3ca5d7
Cascade removing for tracked shapes
bsekachev Jul 22, 2024
702378b
Merge branch 'develop' into bs/requests_optimization
bsekachev Jul 22, 2024
8ef2350
Refactoring
bsekachev Jul 22, 2024
05cd5b1
Removed extra transaction
bsekachev Jul 22, 2024
405aada
Removing child labels first
bsekachev Jul 22, 2024
0ac522a
Aborted part of code
bsekachev Jul 23, 2024
82dc60a
Aborted part of code
bsekachev Jul 23, 2024
c37a549
Added typings
bsekachev Jul 23, 2024
796028c
Merge branch 'develop' into bs/requests_optimization
bsekachev Jul 24, 2024
c5a4704
Changed batch size
bsekachev Jul 24, 2024
a9813da
Update cvat/apps/dataset_manager/task.py
bsekachev Jul 26, 2024
6ef6f89
Update cvat/apps/dataset_manager/task.py
bsekachev Jul 26, 2024
1ef7131
Added named parameter
bsekachev Jul 26, 2024
5d21888
Merge branch 'develop' into bs/requests_optimization
bsekachev Jul 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions cvat/apps/dataset_manager/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from cvat.apps.engine import models, serializers
from cvat.apps.engine.plugins import plugin_decorator
from cvat.apps.engine.log import DatasetLogManager
from cvat.apps.engine.utils import chunked_list
from cvat.apps.events.handlers import handle_annotations_change
from cvat.apps.profiler import silk_profile

Expand Down Expand Up @@ -437,24 +438,52 @@ def update(self, data):
if not self._data_is_empty(self.data):
self._set_updated_date()

def _delete_job_labeledimages(self, ids__UNSAFE: list[int]) -> None:
# ids__UNSAFE is a list, received from the user
# we MUST filter it by job_id additionally before applying to any queries
ids = self.db_job.labeledimage_set.filter(pk__in=ids__UNSAFE).values_list('id', flat=True)
models.LabeledImageAttributeVal.objects.filter(image_id__in=ids).delete()
self.db_job.labeledimage_set.filter(pk__in=ids).delete()

def _delete_job_labeledshapes(self, ids__UNSAFE: list[int], is_subcall=False) -> None:
bsekachev marked this conversation as resolved.
Show resolved Hide resolved
# ids__UNSAFE is a list, received from the user
# we MUST filter it by job_id additionally before applying to any queries
if is_subcall:
ids = ids__UNSAFE
else:
ids = self.db_job.labeledshape_set.filter(pk__in=ids__UNSAFE).values_list('id', flat=True)
child_ids = self.db_job.labeledshape_set.filter(parent_id__in=ids).values_list('id', flat=True)
if len(child_ids):
self._delete_job_labeledshapes(child_ids, True)

models.LabeledShapeAttributeVal.objects.filter(shape_id__in=ids).delete()
self.db_job.labeledshape_set.filter(pk__in=ids).delete()

def _delete_job_labeledtracks(self, ids__UNSAFE: list[int], is_subcall=False) -> None:
bsekachev marked this conversation as resolved.
Show resolved Hide resolved
# ids__UNSAFE is a list, received from the user
# we MUST filter it by job_id additionally before applying to any queries
if is_subcall:
ids = ids__UNSAFE
else:
ids = self.db_job.labeledtrack_set.filter(pk__in=ids__UNSAFE).values_list('id', flat=True)
child_ids = self.db_job.labeledtrack_set.filter(parent_id__in=ids).values_list('id', flat=True)
if len(child_ids):
self._delete_job_labeledtracks(child_ids, True)

models.TrackedShapeAttributeVal.objects.filter(shape__track_id__in=ids).delete()
models.LabeledTrackAttributeVal.objects.filter(track_id__in=ids).delete()
self.db_job.labeledtrack_set.filter(pk__in=ids).delete()

def _delete(self, data=None):
deleted_data = {}
if data is None:
self.init_from_db()
deleted_data = self.data
self.db_job.labeledimage_set.all().delete()
self.db_job.labeledshape_set.all().delete()
self.db_job.labeledtrack_set.all().delete()
models.clear_annotations_in_jobs([self.db_job.id])
else:
labeledimage_ids = [image["id"] for image in data["tags"]]
labeledshape_ids = [shape["id"] for shape in data["shapes"]]
labeledtrack_ids = [track["id"] for track in data["tracks"]]
labeledimage_set = self.db_job.labeledimage_set
labeledimage_set = labeledimage_set.filter(pk__in=labeledimage_ids)
labeledshape_set = self.db_job.labeledshape_set
labeledshape_set = labeledshape_set.filter(pk__in=labeledshape_ids)
labeledtrack_set = self.db_job.labeledtrack_set
labeledtrack_set = labeledtrack_set.filter(pk__in=labeledtrack_ids)

# It is not important for us that data had some "invalid" objects
# which were skipped (not actually deleted). The main idea is to
Expand All @@ -463,9 +492,14 @@ def _delete(self, data=None):
self.ir_data.shapes = data['shapes']
self.ir_data.tracks = data['tracks']

labeledimage_set.delete()
labeledshape_set.delete()
labeledtrack_set.delete()
for labeledimage_ids_chunk in chunked_list(labeledimage_ids, chunk_size=1000):
self._delete_job_labeledimages(labeledimage_ids_chunk)

for labeledshape_ids_chunk in chunked_list(labeledshape_ids, chunk_size=1000):
self._delete_job_labeledshapes(labeledshape_ids_chunk)

for labeledtrack_ids_chunk in chunked_list(labeledtrack_ids, chunk_size=1000):
self._delete_job_labeledtracks(labeledtrack_ids_chunk)

deleted_data = {
"tags": data["tags"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Generated by Django 4.2.14 on 2024-07-22 07:27

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("engine", "0081_job_assignee_updated_date_and_more"),
]

operations = [
migrations.AlterField(
model_name="labeledimage",
name="job",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="engine.job"
),
),
migrations.AlterField(
model_name="labeledimageattributeval",
name="image",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.labeledimage",
),
),
migrations.AlterField(
model_name="labeledshape",
name="job",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="engine.job"
),
),
migrations.AlterField(
model_name="labeledshape",
name="parent",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="elements",
to="engine.labeledshape",
),
),
migrations.AlterField(
model_name="labeledshapeattributeval",
name="shape",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.labeledshape",
),
),
migrations.AlterField(
model_name="labeledtrack",
name="job",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="engine.job"
),
),
migrations.AlterField(
model_name="labeledtrack",
name="parent",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="elements",
to="engine.labeledtrack",
),
),
migrations.AlterField(
model_name="labeledtrackattributeval",
name="track",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.labeledtrack",
),
),
migrations.AlterField(
model_name="trackedshapeattributeval",
name="shape",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="attributes",
related_query_name="attribute",
to="engine.trackedshape",
),
),
]
63 changes: 45 additions & 18 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema_field

from cvat.apps.engine.utils import parse_specific_attributes
from cvat.apps.engine.utils import parse_specific_attributes, chunked_list
from cvat.apps.events.utils import cache_deleted

class SafeCharField(models.CharField):
Expand Down Expand Up @@ -325,6 +325,18 @@ class Meta:
def touch(self) -> None:
self.save(update_fields=["updated_date"])

@transaction.atomic(savepoint=False)
def clear_annotations_in_jobs(job_ids):
for job_ids_chunk in chunked_list(job_ids, chunk_size=1000):
bsekachev marked this conversation as resolved.
Show resolved Hide resolved
TrackedShapeAttributeVal.objects.filter(shape__track__job_id__in=job_ids_chunk).delete()
TrackedShape.objects.filter(track__job_id__in=job_ids_chunk).delete()
LabeledTrackAttributeVal.objects.filter(track__job_id__in=job_ids_chunk).delete()
LabeledTrack.objects.filter(job_id__in=job_ids_chunk).delete()
LabeledShapeAttributeVal.objects.filter(shape__job_id__in=job_ids_chunk).delete()
LabeledShape.objects.filter(job_id__in=job_ids_chunk).delete()
LabeledImageAttributeVal.objects.filter(image__job_id__in=job_ids_chunk).delete()
LabeledImage.objects.filter(job_id__in=job_ids_chunk).delete()

class Project(TimestampedModel):
name = SafeCharField(max_length=256)
owner = models.ForeignKey(User, null=True, blank=True,
Expand Down Expand Up @@ -364,7 +376,15 @@ def is_job_staff(self, user_id):
).count() > 0

@cache_deleted
@transaction.atomic(savepoint=False)
def delete(self, using=None, keep_parents=False):
# quicker way to remove annotations and a way to reduce number of queries
# is to remove labels and attributes first, it will remove annotations cascadely

# child objects must be removed first
if self.label_set.exclude(parent=None).count():
self.label_set.exclude(parent=None).delete()
self.label_set.filter(parent=None).delete()
super().delete(using, keep_parents)

# Extend default permission model
Expand Down Expand Up @@ -470,7 +490,19 @@ def __str__(self):
return self.name

@cache_deleted
@transaction.atomic(savepoint=False)
def delete(self, using=None, keep_parents=False):
if not self.project:
# quicker way to remove annotations and a way to reduce number of queries
# is to remove labels and attributes first, it will remove annotations cascadely

# child objects must be removed first
if self.label_set.exclude(parent=None).count():
self.label_set.exclude(parent=None).delete()
self.label_set.filter(parent=None).delete()
else:
job_ids = list(self.segment_set.values_list('job__id', flat=True))
clear_annotations_in_jobs(job_ids)
super().delete(using, keep_parents)

# Redefined a couple of operation for FileSystemStorage to avoid renaming
Expand Down Expand Up @@ -749,18 +781,13 @@ def clean(self) -> None:
return super().clean()

@cache_deleted
@transaction.atomic(savepoint=False)
def delete(self, using=None, keep_parents=False):
if self.segment:
self.segment.delete(using=using, keep_parents=keep_parents)

clear_annotations_in_jobs([self.id])
segment = self.segment
super().delete(using, keep_parents)

self.delete_dirs()

def delete_dirs(self):
job_path = self.get_dirname()
if os.path.isdir(job_path):
shutil.rmtree(job_path)
if segment:
segment.delete()

def make_dirs(self):
job_path = self.get_dirname()
Expand Down Expand Up @@ -912,7 +939,7 @@ def __str__(self):

class Annotation(models.Model):
id = models.BigAutoField(primary_key=True)
job = models.ForeignKey(Job, on_delete=models.CASCADE)
job = models.ForeignKey(Job, on_delete=models.DO_NOTHING)
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
label = models.ForeignKey(Label, on_delete=models.CASCADE)
frame = models.PositiveIntegerField()
group = models.PositiveIntegerField(null=True)
Expand All @@ -939,21 +966,21 @@ class LabeledImage(Annotation):
pass

class LabeledImageAttributeVal(AttributeVal):
image = models.ForeignKey(LabeledImage, on_delete=models.CASCADE,
image = models.ForeignKey(LabeledImage, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class LabeledShape(Annotation, Shape):
parent = models.ForeignKey('self', on_delete=models.CASCADE, null=True, related_name='elements')
parent = models.ForeignKey('self', on_delete=models.DO_NOTHING, null=True, related_name='elements')

class LabeledShapeAttributeVal(AttributeVal):
shape = models.ForeignKey(LabeledShape, on_delete=models.CASCADE,
shape = models.ForeignKey(LabeledShape, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class LabeledTrack(Annotation):
parent = models.ForeignKey('self', on_delete=models.CASCADE, null=True, related_name='elements')
parent = models.ForeignKey('self', on_delete=models.DO_NOTHING, null=True, related_name='elements')

class LabeledTrackAttributeVal(AttributeVal):
track = models.ForeignKey(LabeledTrack, on_delete=models.CASCADE,
track = models.ForeignKey(LabeledTrack, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class TrackedShape(Shape):
Expand All @@ -963,7 +990,7 @@ class TrackedShape(Shape):
frame = models.PositiveIntegerField()

class TrackedShapeAttributeVal(AttributeVal):
shape = models.ForeignKey(TrackedShape, on_delete=models.CASCADE,
shape = models.ForeignKey(TrackedShape, on_delete=models.DO_NOTHING,
related_name='attributes', related_query_name='attribute')

class Profile(models.Model):
Expand Down
4 changes: 4 additions & 0 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,3 +412,7 @@ def directory_tree(path, max_depth=None) -> str:

def is_dataset_export(request: HttpRequest) -> bool:
return to_bool(request.query_params.get('save_images', False))

def chunked_list(lst, chunk_size):
for i in range(0, len(lst), chunk_size):
yield lst[i:i + chunk_size]
bsekachev marked this conversation as resolved.
Show resolved Hide resolved
Loading