Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions comfy/sd.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def encode_from_tokens_scheduled(self, tokens, unprojected=False, add_dict: dict
self.cond_stage_model.set_clip_options({"projected_pooled": False})

self.load_model()
self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
all_hooks.reset()
self.patcher.patch_hooks(None)
if show_pbar:
Expand Down Expand Up @@ -240,6 +241,7 @@ def encode_from_tokens(self, tokens, return_pooled=False, return_dict=False):
self.cond_stage_model.set_clip_options({"projected_pooled": False})

self.load_model()
self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
o = self.cond_stage_model.encode_token_weights(tokens)
cond, pooled = o[:2]
if return_dict:
Expand Down
9 changes: 8 additions & 1 deletion comfy/sd1_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __init__(self, device="cpu", max_length=77,
self.layer_norm_hidden_state = layer_norm_hidden_state
self.return_projected_pooled = return_projected_pooled
self.return_attention_masks = return_attention_masks
self.execution_device = None

if layer == "hidden":
assert layer_idx is not None
Expand All @@ -163,6 +164,7 @@ def freeze(self):
def set_clip_options(self, options):
layer_idx = options.get("layer", self.layer_idx)
self.return_projected_pooled = options.get("projected_pooled", self.return_projected_pooled)
self.execution_device = options.get("execution_device", self.execution_device)
if isinstance(self.layer, list) or self.layer == "all":
pass
elif layer_idx is None or abs(layer_idx) > self.num_layers:
Expand All @@ -175,6 +177,7 @@ def reset_clip_options(self):
self.layer = self.options_default[0]
self.layer_idx = self.options_default[1]
self.return_projected_pooled = self.options_default[2]
self.execution_device = None

def process_tokens(self, tokens, device):
end_token = self.special_tokens.get("end", None)
Expand Down Expand Up @@ -258,7 +261,11 @@ def process_tokens(self, tokens, device):
return torch.cat(embeds_out), torch.tensor(attention_masks, device=device, dtype=torch.long), num_tokens, embeds_info

def forward(self, tokens):
device = self.transformer.get_input_embeddings().weight.device
if self.execution_device is None:
device = self.transformer.get_input_embeddings().weight.device
else:
device = self.execution_device

embeds, attention_mask, num_tokens, embeds_info = self.process_tokens(tokens, device)

attention_mask_model = None
Expand Down
9 changes: 5 additions & 4 deletions comfy_api/latest/_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,10 @@ def save_audio(
for key, value in metadata.items():
output_container.metadata[key] = value

layout = "mono" if waveform.shape[0] == 1 else "stereo"
# Set up the output stream with appropriate properties
if format == "opus":
out_stream = output_container.add_stream("libopus", rate=sample_rate)
out_stream = output_container.add_stream("libopus", rate=sample_rate, layout=layout)
if quality == "64k":
out_stream.bit_rate = 64000
elif quality == "96k":
Expand All @@ -333,7 +334,7 @@ def save_audio(
elif quality == "320k":
out_stream.bit_rate = 320000
elif format == "mp3":
out_stream = output_container.add_stream("libmp3lame", rate=sample_rate)
out_stream = output_container.add_stream("libmp3lame", rate=sample_rate, layout=layout)
if quality == "V0":
# TODO i would really love to support V3 and V5 but there doesn't seem to be a way to set the qscale level, the property below is a bool
out_stream.codec_context.qscale = 1
Expand All @@ -342,12 +343,12 @@ def save_audio(
elif quality == "320k":
out_stream.bit_rate = 320000
else: # format == "flac":
out_stream = output_container.add_stream("flac", rate=sample_rate)
out_stream = output_container.add_stream("flac", rate=sample_rate, layout=layout)

frame = av.AudioFrame.from_ndarray(
waveform.movedim(0, 1).reshape(1, -1).float().numpy(),
format="flt",
layout="mono" if waveform.shape[0] == 1 else "stereo",
layout=layout,
)
frame.sample_rate = sample_rate
frame.pts = 0
Expand Down
Loading
Loading