Skip to content

Commit 15aa922

Browse files
Trim audio to video when saving video. (Comfy-Org#9617)
1 parent c7bb3e2 commit 15aa922

File tree

1 file changed

+10
-26
lines changed

1 file changed

+10
-26
lines changed

comfy_api/latest/_input_impl/video_types.py

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import io
99
import json
1010
import numpy as np
11+
import math
1112
import torch
1213
from comfy_api.latest._util import VideoContainer, VideoCodec, VideoComponents
1314

@@ -282,8 +283,6 @@ def save_to(
282283
if self.__components.audio:
283284
audio_sample_rate = int(self.__components.audio['sample_rate'])
284285
audio_stream = output.add_stream('aac', rate=audio_sample_rate)
285-
audio_stream.sample_rate = audio_sample_rate
286-
audio_stream.format = 'fltp'
287286

288287
# Encode video
289288
for i, frame in enumerate(self.__components.images):
@@ -298,27 +297,12 @@ def save_to(
298297
output.mux(packet)
299298

300299
if audio_stream and self.__components.audio:
301-
# Encode audio
302-
samples_per_frame = int(audio_sample_rate / frame_rate)
303-
num_frames = self.__components.audio['waveform'].shape[2] // samples_per_frame
304-
for i in range(num_frames):
305-
start = i * samples_per_frame
306-
end = start + samples_per_frame
307-
# TODO(Feature) - Add support for stereo audio
308-
chunk = (
309-
self.__components.audio["waveform"][0, 0, start:end]
310-
.unsqueeze(0)
311-
.contiguous()
312-
.numpy()
313-
)
314-
audio_frame = av.AudioFrame.from_ndarray(chunk, format='fltp', layout='mono')
315-
audio_frame.sample_rate = audio_sample_rate
316-
audio_frame.pts = i * samples_per_frame
317-
for packet in audio_stream.encode(audio_frame):
318-
output.mux(packet)
319-
320-
# Flush audio
321-
for packet in audio_stream.encode(None):
322-
output.mux(packet)
323-
324-
300+
waveform = self.__components.audio['waveform']
301+
waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])]
302+
frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo')
303+
frame.sample_rate = audio_sample_rate
304+
frame.pts = 0
305+
output.mux(audio_stream.encode(frame))
306+
307+
# Flush encoder
308+
output.mux(audio_stream.encode(None))

0 commit comments

Comments
 (0)