cvat-ai · nmanovic · Jul 19, 2023 · Jul 18, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/opencv/cvat/pull/6454>)
 - \[SDK\] SDK should not change input data in models (<https://github.com/opencv/cvat/pull/6455>)
 - 3D job can not be opened in validation mode (<https://github.com/opencv/cvat/pull/6507>)
+- Memory leak related to unclosed av container (<https://github.com/opencv/cvat/pull/6501>)
 
 ### Security
 - TDB

@@ -429,32 +429,27 @@ def _has_frame(self, i):
 
         return False
 
-    def _decode(self, container):
-        frame_num = 0
-        for packet in container.demux():
-            if packet.stream.type == 'video':
+    def __iter__(self):
+        with self._get_av_container() as container:
+            stream = container.streams.video[0]
+            stream.thread_type = 'AUTO'
+            frame_num = 0
+            for packet in container.demux(stream):
                 for image in packet.decode():
                     frame_num += 1
                     if self._has_frame(frame_num - 1):
                         if packet.stream.metadata.get('rotate'):
-                            old_image = image
+                            pts = image.pts
                             image = av.VideoFrame().from_ndarray(
                                 rotate_image(
                                     image.to_ndarray(format='bgr24'),
-                                    360 - int(container.streams.video[0].metadata.get('rotate'))
+                                    360 - int(stream.metadata.get('rotate'))
                                 ),
                                 format ='bgr24'
                             )
-                            image.pts = old_image.pts
+                            image.pts = pts
                         yield (image, self._source_path[0], image.pts)
 
-    def __iter__(self):
-        container = self._get_av_container()
-        source_video_stream = container.streams.video[0]
-        source_video_stream.thread_type = 'AUTO'
-
-        return self._decode(container)
-
     def get_progress(self, pos):
         duration = self._get_duration()
         return pos / duration if duration else None
@@ -465,38 +460,38 @@ def _get_av_container(self):
         return av.open(self._source_path[0])
 
     def _get_duration(self):
-        container = self._get_av_container()
-        stream = container.streams.video[0]
-        duration = None
-        if stream.duration:
-            duration = stream.duration
-        else:
-            # may have a DURATION in format like "01:16:45.935000000"
-            duration_str = stream.metadata.get("DURATION", None)
-            tb_denominator = stream.time_base.denominator
-            if duration_str and tb_denominator:
-                _hour, _min, _sec = duration_str.split(':')
-                duration_sec = 60*60*float(_hour) + 60*float(_min) + float(_sec)
-                duration = duration_sec * tb_denominator
-        return duration
+        with self._get_av_container() as container:
+            stream = container.streams.video[0]
+            duration = None
+            if stream.duration:
+                duration = stream.duration
+            else:
+                # may have a DURATION in format like "01:16:45.935000000"
+                duration_str = stream.metadata.get("DURATION", None)
+                tb_denominator = stream.time_base.denominator
+                if duration_str and tb_denominator:
+                    _hour, _min, _sec = duration_str.split(':')
+                    duration_sec = 60*60*float(_hour) + 60*float(_min) + float(_sec)
+                    duration = duration_sec * tb_denominator
+            return duration
 
     def get_preview(self, frame):
-        container = self._get_av_container()
-        stream = container.streams.video[0]
-        tb_denominator = stream.time_base.denominator
-        needed_time = int((frame / stream.guessed_rate) * tb_denominator)
-        container.seek(offset=needed_time, stream=stream)
-        for packet in container.demux(stream):
-            for frame in packet.decode():
-                return self._get_preview(frame.to_image() if not stream.metadata.get('rotate') \
-                    else av.VideoFrame().from_ndarray(
-                        rotate_image(
-                            frame.to_ndarray(format='bgr24'),
-                            360 - int(container.streams.video[0].metadata.get('rotate'))
-                        ),
-                        format ='bgr24'
-                    ).to_image()
-                )
+        with self._get_av_container() as container:
+            stream = container.streams.video[0]
+            tb_denominator = stream.time_base.denominator
+            needed_time = int((frame / stream.guessed_rate) * tb_denominator)
+            container.seek(offset=needed_time, stream=stream)
+            for packet in container.demux(stream):
+                for frame in packet.decode():
+                    return self._get_preview(frame.to_image() if not stream.metadata.get('rotate') \
+                        else av.VideoFrame().from_ndarray(
+                            rotate_image(
+                                frame.to_ndarray(format='bgr24'),
+                                360 - int(container.streams.video[0].metadata.get('rotate'))
+                            ),
+                            format ='bgr24'
+                        ).to_image()
+                    )
 
     def get_image_size(self, i):
         image = (next(iter(self)))[0]
@@ -700,6 +695,8 @@ def save_as_chunk(
         return image_sizes
 
 class Mpeg4ChunkWriter(IChunkWriter):
+    FORMAT = 'mp4'
+
     def __init__(self, quality=67):
         # translate inversed range [1:100] to [0:51]
         quality = round(51 * (100 - quality) / 99)
@@ -722,21 +719,20 @@ def __init__(self, quality=67):
                 "preset": "ultrafast",
             }
 
-    def _create_av_container(self, path, w, h, rate, options, f='mp4'):
+    def _add_video_stream(self, container, w, h, rate, options):
         # x264 requires width and height must be divisible by 2 for yuv420p
         if h % 2:
             h += 1
         if w % 2:
             w += 1
 
-        container = av.open(path, 'w',format=f)
         video_stream = container.add_stream(self._codec_name, rate=rate)
         video_stream.pix_fmt = "yuv420p"
         video_stream.width = w
         video_stream.height = h
         video_stream.options = options
 
-        return container, video_stream
+        return video_stream
 
     def save_as_chunk(self, images, chunk_path):
         if not images:
@@ -745,16 +741,16 @@ def save_as_chunk(self, images, chunk_path):
         input_w = images[0][0].width
         input_h = images[0][0].height
 
-        output_container, output_v_stream = self._create_av_container(
-            path=chunk_path,
-            w=input_w,
-            h=input_h,
-            rate=self._output_fps,
-            options=self._codec_opts,
-        )
+        with av.open(chunk_path, 'w', format=self.FORMAT) as output_container:
+            output_v_stream = self._add_video_stream(
+                container=output_container,
+                w=input_w,
+                h=input_h,
+                rate=self._output_fps,
+                options=self._codec_opts,
+            )
 
-        self._encode_images(images, output_container, output_v_stream)
-        output_container.close()
+            self._encode_images(images, output_container, output_v_stream)
         return [(input_w, input_h)]
 
     @staticmethod
@@ -797,16 +793,16 @@ def save_as_chunk(self, images, chunk_path):
         output_h = input_h // downscale_factor
         output_w = input_w // downscale_factor
 
-        output_container, output_v_stream = self._create_av_container(
-            path=chunk_path,
-            w=output_w,
-            h=output_h,
-            rate=self._output_fps,
-            options=self._codec_opts,
-        )
+        with av.open(chunk_path, 'w', format=self.FORMAT) as output_container:
+            output_v_stream = self._add_video_stream(
+                container=output_container,
+                w=output_w,
+                h=output_h,
+                rate=self._output_fps,
+                options=self._codec_opts,
+            )
 
-        self._encode_images(images, output_container, output_v_stream)
-        output_container.close()
+            self._encode_images(images, output_container, output_v_stream)
         return [(input_w, input_h)]
 
 def _is_archive(path):