Set audio codec to AAC when format=mp4

achille-fouilleul · achille-fouilleul · commit 02440b058aba · 2024-10-14T10:22:12.000+02:00
diff --git a/manim/scene/scene_file_writer.py b/manim/scene/scene_file_writer.py
@@ -57,6 +57,21 @@ def to_av_frame_rate(fps):
     return av.utils.Fraction(num, denom)
 
 
+def convert_audio(input_path: Path, output_path: Path, codec_name: str):
+    with (
+        av.open(input_path) as input_audio,
+        av.open(output_path, "w") as output_audio,
+    ):
+        input_audio_stream = input_audio.streams.audio[0]
+        output_audio_stream = output_audio.add_stream(codec_name)
+        for frame in input_audio.decode(input_audio_stream):
+            for packet in output_audio_stream.encode(frame):
+                output_audio.mux(packet)
+
+        for packet in output_audio_stream.encode():
+            output_audio.mux(packet)
+
+
 class SceneFileWriter:
     """
     SceneFileWriter is the object that actually writes the animations
@@ -350,19 +365,7 @@ def add_sound(
             # we need to pass delete=False to work on Windows
             # TODO: figure out a way to cache the wav file generated (benchmark needed)
             wav_file_path = NamedTemporaryFile(suffix=".wav", delete=False)
-            with (
-                av.open(file_path) as input_container,
-                av.open(wav_file_path, "w", format="wav") as output_container,
-            ):
-                for audio_stream in input_container.streams.audio:
-                    output_stream = output_container.add_stream("pcm_s16le")
-                    for frame in input_container.decode(audio_stream):
-                        for packet in output_stream.encode(frame):
-                            output_container.mux(packet)
-
-                    for packet in output_stream.encode():
-                        output_container.mux(packet)
-
+            convert_audio(file_path, wav_file_path, "pcm_s16le")
             new_segment = AudioSegment.from_file(wav_file_path.name)
             logger.info(f"Automatically converted {file_path} to .wav")
             wav_file_path.close()
@@ -748,21 +751,17 @@ def combine_to_movie(self):
             # but tries to call ffmpeg via its CLI -- which we want
             # to avoid. This is why we need to do the conversion
             # manually.
-            if config.format == "webm":
-                with (
-                    av.open(sound_file_path) as wav_audio,
-                    av.open(sound_file_path.with_suffix(".ogg"), "w") as opus_audio,
-                ):
-                    wav_audio_stream = wav_audio.streams.audio[0]
-                    opus_audio_stream = opus_audio.add_stream("libvorbis")
-                    for frame in wav_audio.decode(wav_audio_stream):
-                        for packet in opus_audio_stream.encode(frame):
-                            opus_audio.mux(packet)
-
-                    for packet in opus_audio_stream.encode():
-                        opus_audio.mux(packet)
-
-                sound_file_path = sound_file_path.with_suffix(".ogg")
+            out_suffix = movie_file_path.suffix.lower()
+            if config.format == "webm" or out_suffix == ".webm":
+                ogg_sound_file_path = sound_file_path.with_suffix(".ogg")
+                convert_audio(sound_file_path, ogg_sound_file_path, "libvorbis")
+                sound_file_path = ogg_sound_file_path
+            elif config.format == "mp4" or out_suffix == ".mp4":
+                # Similarly, pyav may reject wav audio in an .mp4 file;
+                # convert to AAC.
+                aac_sound_file_path = sound_file_path.with_suffix(".aac")
+                convert_audio(sound_file_path, aac_sound_file_path, "aac")
+                sound_file_path = aac_sound_file_path
 
             temp_file_path = movie_file_path.with_name(
                 f"{movie_file_path.stem}_temp{movie_file_path.suffix}"