Set AAC codec for audio in mp4 files, add transcoding utility (#3956)

* scene_file_writer: convert frame_rate to fraction * Set audio codec to AAC when format=mp4 * refactor: change import uv.utils.Fraction -> fractions.Fraction * use config as single source of truth for container format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Benjamin Hackl <devel@benjamin-hackl.at> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
ManimCommunity · Oct 19, 2024 · 5788f81 · 5788f81
1 parent 0a96aac
commit 5788f81
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 36 deletions.
diff --git a/manim/scene/scene_file_writer.py b/manim/scene/scene_file_writer.py
@@ -6,6 +6,7 @@
 
 import json
 import shutil
+from fractions import Fraction
 from pathlib import Path
 from queue import Queue
 from tempfile import NamedTemporaryFile
@@ -40,6 +41,38 @@
     from manim.renderer.opengl_renderer import OpenGLRenderer
 
 
+def to_av_frame_rate(fps):
+    epsilon1 = 1e-4
+    epsilon2 = 0.02
+
+    if isinstance(fps, int):
+        (num, denom) = (fps, 1)
+    elif abs(fps - round(fps)) < epsilon1:
+        (num, denom) = (round(fps), 1)
+    else:
+        denom = 1001
+        num = round(fps * denom / 1000) * 1000
+        if abs(fps - num / denom) >= epsilon2:
+            raise ValueError("invalid frame rate")
+
+    return Fraction(num, denom)
+
+
+def convert_audio(input_path: Path, output_path: Path, codec_name: str):
+    with (
+        av.open(input_path) as input_audio,
+        av.open(output_path, "w") as output_audio,
+    ):
+        input_audio_stream = input_audio.streams.audio[0]
+        output_audio_stream = output_audio.add_stream(codec_name)
+        for frame in input_audio.decode(input_audio_stream):
+            for packet in output_audio_stream.encode(frame):
+                output_audio.mux(packet)
+
+        for packet in output_audio_stream.encode():
+            output_audio.mux(packet)
+
+
 class SceneFileWriter:
     """
     SceneFileWriter is the object that actually writes the animations
@@ -333,19 +366,7 @@ def add_sound(
             # we need to pass delete=False to work on Windows
             # TODO: figure out a way to cache the wav file generated (benchmark needed)
             wav_file_path = NamedTemporaryFile(suffix=".wav", delete=False)
-            with (
-                av.open(file_path) as input_container,
-                av.open(wav_file_path, "w", format="wav") as output_container,
-            ):
-                for audio_stream in input_container.streams.audio:
-                    output_stream = output_container.add_stream("pcm_s16le")
-                    for frame in input_container.decode(audio_stream):
-                        for packet in output_stream.encode(frame):
-                            output_container.mux(packet)
-
-                    for packet in output_stream.encode():
-                        output_container.mux(packet)
-
+            convert_audio(file_path, wav_file_path, "pcm_s16le")
             new_segment = AudioSegment.from_file(wav_file_path.name)
             logger.info(f"Automatically converted {file_path} to .wav")
             wav_file_path.close()
@@ -506,9 +527,7 @@ def open_partial_movie_stream(self, file_path=None) -> None:
             file_path = self.partial_movie_files[self.renderer.num_plays]
         self.partial_movie_file_path = file_path
 
-        fps = config["frame_rate"]
-        if fps == int(fps):  # fps is integer
-            fps = int(fps)
+        fps = to_av_frame_rate(config.frame_rate)
 
         partial_movie_file_codec = "libx264"
         partial_movie_file_pix_fmt = "yuv420p"
@@ -517,7 +536,7 @@ def open_partial_movie_stream(self, file_path=None) -> None:
             "crf": "23",  # ffmpeg: -crf, constant rate factor (improved bitrate)
         }
 
-        if config.format == "webm":
+        if config.movie_file_extension == ".webm":
             partial_movie_file_codec = "libvpx-vp9"
             av_options["-auto-alt-ref"] = "1"
             if config.transparent:
@@ -530,7 +549,7 @@ def open_partial_movie_stream(self, file_path=None) -> None:
         with av.open(file_path, mode="w") as video_container:
             stream = video_container.add_stream(
                 partial_movie_file_codec,
-                rate=config.frame_rate,
+                rate=fps,
                 options=av_options,
             )
             stream.pix_fmt = partial_movie_file_pix_fmt
@@ -622,7 +641,7 @@ def combine_files(
             codec_name="gif" if create_gif else None,
             template=partial_movies_stream if not create_gif else None,
         )
-        if config.transparent and config.format == "webm":
+        if config.transparent and config.movie_file_extension == ".webm":
             output_stream.pix_fmt = "yuva420p"
         if create_gif:
             """
@@ -636,7 +655,7 @@ def combine_files(
                 output_stream.pix_fmt = "pal8"
             output_stream.width = config.pixel_width
             output_stream.height = config.pixel_height
-            output_stream.rate = config.frame_rate
+            output_stream.rate = to_av_frame_rate(config.frame_rate)
             graph = av.filter.Graph()
             input_buffer = graph.add_buffer(template=partial_movies_stream)
             split = graph.add("split")
@@ -663,7 +682,8 @@ def combine_files(
             while True:
                 try:
                     frame = graph.pull()
-                    frame.time_base = output_stream.codec_context.time_base
+                    if output_stream.codec_context.time_base is not None:
+                        frame.time_base = output_stream.codec_context.time_base
                     frame.pts = frames_written
                     frames_written += 1
                     output_container.mux(output_stream.encode(frame))
@@ -704,6 +724,7 @@ def combine_to_movie(self):
         movie_file_path = self.movie_file_path
         if is_gif_format():
             movie_file_path = self.gif_file_path
+
         if len(partial_movie_files) == 0:  # Prevent calling concat on empty list
             logger.info("No animations are contained in this scene.")
             return
@@ -732,21 +753,16 @@ def combine_to_movie(self):
             # but tries to call ffmpeg via its CLI -- which we want
             # to avoid. This is why we need to do the conversion
             # manually.
-            if config.format == "webm":
-                with (
-                    av.open(sound_file_path) as wav_audio,
-                    av.open(sound_file_path.with_suffix(".ogg"), "w") as opus_audio,
-                ):
-                    wav_audio_stream = wav_audio.streams.audio[0]
-                    opus_audio_stream = opus_audio.add_stream("libvorbis")
-                    for frame in wav_audio.decode(wav_audio_stream):
-                        for packet in opus_audio_stream.encode(frame):
-                            opus_audio.mux(packet)
-
-                    for packet in opus_audio_stream.encode():
-                        opus_audio.mux(packet)
-
-                sound_file_path = sound_file_path.with_suffix(".ogg")
+            if config.movie_file_extension == ".webm":
+                ogg_sound_file_path = sound_file_path.with_suffix(".ogg")
+                convert_audio(sound_file_path, ogg_sound_file_path, "libvorbis")
+                sound_file_path = ogg_sound_file_path
+            elif config.movie_file_extension == ".mp4":
+                # Similarly, pyav may reject wav audio in an .mp4 file;
+                # convert to AAC.
+                aac_sound_file_path = sound_file_path.with_suffix(".aac")
+                convert_audio(sound_file_path, aac_sound_file_path, "aac")
+                sound_file_path = aac_sound_file_path
 
             temp_file_path = movie_file_path.with_name(
                 f"{movie_file_path.stem}_temp{movie_file_path.suffix}"

diff --git a/tests/test_scene_rendering/test_file_writer.py b/tests/test_scene_rendering/test_file_writer.py
@@ -1,11 +1,13 @@
 import sys
+from fractions import Fraction
 from pathlib import Path
 
 import av
 import numpy as np
 import pytest
 
 from manim import DR, Circle, Create, Scene, Star, tempconfig
+from manim.scene.scene_file_writer import to_av_frame_rate
 from manim.utils.commands import capture, get_video_metadata
 
 
@@ -175,3 +177,11 @@ def test_unicode_partial_movie(tmpdir, simple_scenes_path):
 
     _, err, exit_code = capture(command)
     assert exit_code == 0, err
+
+
+def test_frame_rates():
+    assert to_av_frame_rate(25) == Fraction(25, 1)
+    assert to_av_frame_rate(24.0) == Fraction(24, 1)
+    assert to_av_frame_rate(23.976) == Fraction(24 * 1000, 1001)
+    assert to_av_frame_rate(23.98) == Fraction(24 * 1000, 1001)
+    assert to_av_frame_rate(59.94) == Fraction(60 * 1000, 1001)