Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory optimization for image chunk preparation #8581

Merged
merged 3 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Memory consumption during preparation of image chunks
(<https://github.com/cvat-ai/cvat/pull/8581>)
12 changes: 5 additions & 7 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
ZipChunkWriter,
ZipCompressedChunkWriter,
)
from cvat.apps.engine.utils import md5_hash, preload_images
from cvat.apps.engine.utils import md5_hash, load_image
from utils.dataset_manifest import ImageManifestManager

slogger = ServerLogManager(__name__)
Expand Down Expand Up @@ -321,15 +321,13 @@ def _read_raw_images(
cloud_storage_instance.bulk_download_to_dir(
files=files_to_download, upload_dir=tmp_dir
)
media = preload_images(media)

for checksum, (_, fs_filename, _) in zip(checksums, media):
if checksum and not md5_hash(fs_filename) == checksum:
for checksum, media_item in zip(checksums, media):
if checksum and not md5_hash(media_item[1]) == checksum:
slogger.cloud_storage[db_cloud_storage.id].warning(
"Hash sums of files {} do not match".format(file_name)
)

yield from media
yield load_image(media_item)
else:
requested_frame_iter = iter(frame_ids)
next_requested_frame_id = next(requested_frame_iter, None)
Expand Down Expand Up @@ -359,7 +357,7 @@ def _read_raw_images(
assert next_requested_frame_id is None

if db_task.dimension == models.DimensionType.DIM_2D:
media = preload_images(media)
media = map(load_image, media)

yield from media

Expand Down
4 changes: 2 additions & 2 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from cvat.apps.engine.models import RequestAction, RequestTarget
from cvat.apps.engine.utils import (
av_scan_paths, format_list,get_rq_job_meta,
define_dependent_job, get_rq_lock_by_user, preload_images
define_dependent_job, get_rq_lock_by_user, load_image
)
from cvat.apps.engine.rq_job_handler import RQId
from cvat.utils.http import make_requests_session, PROXIES_FOR_UNTRUSTED_URLS
Expand Down Expand Up @@ -1537,7 +1537,7 @@ def save_chunks(
MEDIA_TYPES['archive']['extractor'],
))
):
chunk_data = preload_images(chunk_data)
chunk_data = list(map(load_image, chunk_data))

# TODO: extract into a class

Expand Down
7 changes: 2 additions & 5 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import traceback
from contextlib import suppress, nullcontext
from typing import Any, Dict, Optional, Callable, Sequence, Union, Iterable
from typing import Any, Dict, Optional, Callable, Sequence, Union
import subprocess
import os
import urllib.parse
Expand Down Expand Up @@ -363,14 +363,11 @@ def sendfile(

return _sendfile(request, filename, attachment, attachment_filename, mimetype, encoding)

def preload_image(image: tuple[str, str, str])-> tuple[Image.Image, str, str]:
def load_image(image: tuple[str, str, str])-> tuple[Image.Image, str, str]:
pil_img = Image.open(image[0])
pil_img.load()
return pil_img, image[1], image[2]

def preload_images(images: Iterable[tuple[str, str, str]]) -> list[tuple[Image.Image, str, str]]:
return list(map(preload_image, images))

def build_backup_file_name(
*,
class_name: str,
Expand Down
Loading