88import io
99import json
1010import numpy as np
11+ import math
1112import torch
1213from comfy_api .latest ._util import VideoContainer , VideoCodec , VideoComponents
1314
@@ -282,8 +283,6 @@ def save_to(
282283 if self .__components .audio :
283284 audio_sample_rate = int (self .__components .audio ['sample_rate' ])
284285 audio_stream = output .add_stream ('aac' , rate = audio_sample_rate )
285- audio_stream .sample_rate = audio_sample_rate
286- audio_stream .format = 'fltp'
287286
288287 # Encode video
289288 for i , frame in enumerate (self .__components .images ):
@@ -298,27 +297,12 @@ def save_to(
298297 output .mux (packet )
299298
300299 if audio_stream and self .__components .audio :
301- # Encode audio
302- samples_per_frame = int (audio_sample_rate / frame_rate )
303- num_frames = self .__components .audio ['waveform' ].shape [2 ] // samples_per_frame
304- for i in range (num_frames ):
305- start = i * samples_per_frame
306- end = start + samples_per_frame
307- # TODO(Feature) - Add support for stereo audio
308- chunk = (
309- self .__components .audio ["waveform" ][0 , 0 , start :end ]
310- .unsqueeze (0 )
311- .contiguous ()
312- .numpy ()
313- )
314- audio_frame = av .AudioFrame .from_ndarray (chunk , format = 'fltp' , layout = 'mono' )
315- audio_frame .sample_rate = audio_sample_rate
316- audio_frame .pts = i * samples_per_frame
317- for packet in audio_stream .encode (audio_frame ):
318- output .mux (packet )
319-
320- # Flush audio
321- for packet in audio_stream .encode (None ):
322- output .mux (packet )
323-
324-
300+ waveform = self .__components .audio ['waveform' ]
301+ waveform = waveform [:, :, :math .ceil ((audio_sample_rate / frame_rate ) * self .__components .images .shape [0 ])]
302+ frame = av .AudioFrame .from_ndarray (waveform .movedim (2 , 1 ).reshape (1 , - 1 ).float ().numpy (), format = 'flt' , layout = 'mono' if waveform .shape [1 ] == 1 else 'stereo' )
303+ frame .sample_rate = audio_sample_rate
304+ frame .pts = 0
305+ output .mux (audio_stream .encode (frame ))
306+
307+ # Flush encoder
308+ output .mux (audio_stream .encode (None ))
0 commit comments