Skip to content

Commit

Permalink
Set AAC codec for audio in mp4 files, add transcoding utility (#3956)
Browse files Browse the repository at this point in the history
* scene_file_writer: convert frame_rate to fraction

* Set audio codec to AAC when format=mp4

* refactor: change import uv.utils.Fraction -> fractions.Fraction

* use config as single source of truth for container format

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Benjamin Hackl <devel@benjamin-hackl.at>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 19, 2024
1 parent 0a96aac commit 5788f81
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 36 deletions.
88 changes: 52 additions & 36 deletions manim/scene/scene_file_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import json
import shutil
from fractions import Fraction
from pathlib import Path
from queue import Queue
from tempfile import NamedTemporaryFile
Expand Down Expand Up @@ -40,6 +41,38 @@
from manim.renderer.opengl_renderer import OpenGLRenderer


def to_av_frame_rate(fps):
epsilon1 = 1e-4
epsilon2 = 0.02

if isinstance(fps, int):
(num, denom) = (fps, 1)
elif abs(fps - round(fps)) < epsilon1:
(num, denom) = (round(fps), 1)
else:
denom = 1001
num = round(fps * denom / 1000) * 1000
if abs(fps - num / denom) >= epsilon2:
raise ValueError("invalid frame rate")

return Fraction(num, denom)


def convert_audio(input_path: Path, output_path: Path, codec_name: str):
with (
av.open(input_path) as input_audio,
av.open(output_path, "w") as output_audio,
):
input_audio_stream = input_audio.streams.audio[0]
output_audio_stream = output_audio.add_stream(codec_name)
for frame in input_audio.decode(input_audio_stream):
for packet in output_audio_stream.encode(frame):
output_audio.mux(packet)

for packet in output_audio_stream.encode():
output_audio.mux(packet)


class SceneFileWriter:
"""
SceneFileWriter is the object that actually writes the animations
Expand Down Expand Up @@ -333,19 +366,7 @@ def add_sound(
# we need to pass delete=False to work on Windows
# TODO: figure out a way to cache the wav file generated (benchmark needed)
wav_file_path = NamedTemporaryFile(suffix=".wav", delete=False)
with (
av.open(file_path) as input_container,
av.open(wav_file_path, "w", format="wav") as output_container,
):
for audio_stream in input_container.streams.audio:
output_stream = output_container.add_stream("pcm_s16le")
for frame in input_container.decode(audio_stream):
for packet in output_stream.encode(frame):
output_container.mux(packet)

for packet in output_stream.encode():
output_container.mux(packet)

convert_audio(file_path, wav_file_path, "pcm_s16le")
new_segment = AudioSegment.from_file(wav_file_path.name)
logger.info(f"Automatically converted {file_path} to .wav")
wav_file_path.close()
Expand Down Expand Up @@ -506,9 +527,7 @@ def open_partial_movie_stream(self, file_path=None) -> None:
file_path = self.partial_movie_files[self.renderer.num_plays]
self.partial_movie_file_path = file_path

fps = config["frame_rate"]
if fps == int(fps): # fps is integer
fps = int(fps)
fps = to_av_frame_rate(config.frame_rate)

partial_movie_file_codec = "libx264"
partial_movie_file_pix_fmt = "yuv420p"
Expand All @@ -517,7 +536,7 @@ def open_partial_movie_stream(self, file_path=None) -> None:
"crf": "23", # ffmpeg: -crf, constant rate factor (improved bitrate)
}

if config.format == "webm":
if config.movie_file_extension == ".webm":
partial_movie_file_codec = "libvpx-vp9"
av_options["-auto-alt-ref"] = "1"
if config.transparent:
Expand All @@ -530,7 +549,7 @@ def open_partial_movie_stream(self, file_path=None) -> None:
with av.open(file_path, mode="w") as video_container:
stream = video_container.add_stream(
partial_movie_file_codec,
rate=config.frame_rate,
rate=fps,
options=av_options,
)
stream.pix_fmt = partial_movie_file_pix_fmt
Expand Down Expand Up @@ -622,7 +641,7 @@ def combine_files(
codec_name="gif" if create_gif else None,
template=partial_movies_stream if not create_gif else None,
)
if config.transparent and config.format == "webm":
if config.transparent and config.movie_file_extension == ".webm":
output_stream.pix_fmt = "yuva420p"
if create_gif:
"""
Expand All @@ -636,7 +655,7 @@ def combine_files(
output_stream.pix_fmt = "pal8"
output_stream.width = config.pixel_width
output_stream.height = config.pixel_height
output_stream.rate = config.frame_rate
output_stream.rate = to_av_frame_rate(config.frame_rate)
graph = av.filter.Graph()
input_buffer = graph.add_buffer(template=partial_movies_stream)
split = graph.add("split")
Expand All @@ -663,7 +682,8 @@ def combine_files(
while True:
try:
frame = graph.pull()
frame.time_base = output_stream.codec_context.time_base
if output_stream.codec_context.time_base is not None:
frame.time_base = output_stream.codec_context.time_base
frame.pts = frames_written
frames_written += 1
output_container.mux(output_stream.encode(frame))
Expand Down Expand Up @@ -704,6 +724,7 @@ def combine_to_movie(self):
movie_file_path = self.movie_file_path
if is_gif_format():
movie_file_path = self.gif_file_path

if len(partial_movie_files) == 0: # Prevent calling concat on empty list
logger.info("No animations are contained in this scene.")
return
Expand Down Expand Up @@ -732,21 +753,16 @@ def combine_to_movie(self):
# but tries to call ffmpeg via its CLI -- which we want
# to avoid. This is why we need to do the conversion
# manually.
if config.format == "webm":
with (
av.open(sound_file_path) as wav_audio,
av.open(sound_file_path.with_suffix(".ogg"), "w") as opus_audio,
):
wav_audio_stream = wav_audio.streams.audio[0]
opus_audio_stream = opus_audio.add_stream("libvorbis")
for frame in wav_audio.decode(wav_audio_stream):
for packet in opus_audio_stream.encode(frame):
opus_audio.mux(packet)

for packet in opus_audio_stream.encode():
opus_audio.mux(packet)

sound_file_path = sound_file_path.with_suffix(".ogg")
if config.movie_file_extension == ".webm":
ogg_sound_file_path = sound_file_path.with_suffix(".ogg")
convert_audio(sound_file_path, ogg_sound_file_path, "libvorbis")
sound_file_path = ogg_sound_file_path
elif config.movie_file_extension == ".mp4":
# Similarly, pyav may reject wav audio in an .mp4 file;
# convert to AAC.
aac_sound_file_path = sound_file_path.with_suffix(".aac")
convert_audio(sound_file_path, aac_sound_file_path, "aac")
sound_file_path = aac_sound_file_path

temp_file_path = movie_file_path.with_name(
f"{movie_file_path.stem}_temp{movie_file_path.suffix}"
Expand Down
10 changes: 10 additions & 0 deletions tests/test_scene_rendering/test_file_writer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import sys
from fractions import Fraction
from pathlib import Path

import av
import numpy as np
import pytest

from manim import DR, Circle, Create, Scene, Star, tempconfig
from manim.scene.scene_file_writer import to_av_frame_rate
from manim.utils.commands import capture, get_video_metadata


Expand Down Expand Up @@ -175,3 +177,11 @@ def test_unicode_partial_movie(tmpdir, simple_scenes_path):

_, err, exit_code = capture(command)
assert exit_code == 0, err


def test_frame_rates():
assert to_av_frame_rate(25) == Fraction(25, 1)
assert to_av_frame_rate(24.0) == Fraction(24, 1)
assert to_av_frame_rate(23.976) == Fraction(24 * 1000, 1001)
assert to_av_frame_rate(23.98) == Fraction(24 * 1000, 1001)
assert to_av_frame_rate(59.94) == Fraction(60 * 1000, 1001)

0 comments on commit 5788f81

Please sign in to comment.