Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions comfy/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
continue

cast_weight = self.force_cast_weights
m.comfy_force_cast_weights = self.force_cast_weights
if lowvram_weight:
if hasattr(m, "comfy_cast_weights"):
m.weight_function = []
Expand Down Expand Up @@ -790,11 +791,12 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
for param in params:
self.pin_weight_to_device("{}.{}".format(n, param))

usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
if lowvram_counter > 0:
logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter))
self.model.model_lowvram = True
else:
logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load))
self.model.model_lowvram = False
if full_load:
self.model.to(device_to)
Expand Down
30 changes: 15 additions & 15 deletions comfy/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,29 +654,29 @@ def forward(self, input, *args, **kwargs):
run_every_op()

input_shape = input.shape
tensor_3d = input.ndim == 3

if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(input, *args, **kwargs)
reshaped_3d = False

if (getattr(self, 'layout_type', None) is not None and
not isinstance(input, QuantizedTensor)):
not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
not getattr(self, 'comfy_force_cast_weights', False) and
len(self.weight_function) == 0 and len(self.bias_function) == 0):

# Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
if tensor_3d:
input = input.reshape(-1, input_shape[2])

if input.ndim != 2:
# Fall back to comfy_cast_weights for non-2D tensors
return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs)
input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input

# dtype is now implicit in the layout class
input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None))
# Fall back to non-quantized for non-2D tensors
if input_reshaped.ndim == 2:
reshaped_3d = input.ndim == 3
# dtype is now implicit in the layout class
scale = getattr(self, 'input_scale', None)
if scale is not None:
scale = comfy.model_management.cast_to_device(scale, input.device, None)
input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

output = self._forward(input, self.weight, self.bias)
output = self.forward_comfy_cast_weights(input)

# Reshape output back to 3D if input was 3D
if tensor_3d:
if reshaped_3d:
output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0]))

return output
Expand Down
1 change: 1 addition & 0 deletions comfy/quant_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
if cuda_version < (13,):
ck.registry.disable("cuda")
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")

ck.registry.disable("triton")
for k, v in ck.list_backends().items():
Expand Down
15 changes: 9 additions & 6 deletions comfy/sd.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def encode_from_tokens_scheduled(self, tokens, unprojected=False, add_dict: dict
if unprojected:
self.cond_stage_model.set_clip_options({"projected_pooled": False})

self.load_model()
self.load_model(tokens)
self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
all_hooks.reset()
self.patcher.patch_hooks(None)
Expand Down Expand Up @@ -266,7 +266,7 @@ def encode_from_tokens(self, tokens, return_pooled=False, return_dict=False):
if return_pooled == "unprojected":
self.cond_stage_model.set_clip_options({"projected_pooled": False})

self.load_model()
self.load_model(tokens)
self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
o = self.cond_stage_model.encode_token_weights(tokens)
cond, pooled = o[:2]
Expand Down Expand Up @@ -299,8 +299,11 @@ def get_sd(self):
sd_clip[k] = sd_tokenizer[k]
return sd_clip

def load_model(self):
model_management.load_model_gpu(self.patcher)
def load_model(self, tokens={}):
memory_used = 0
if hasattr(self.cond_stage_model, "memory_estimation_function"):
memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device)
model_management.load_models_gpu([self.patcher], memory_required=memory_used)
return self.patcher

def get_key_patches(self):
Expand Down Expand Up @@ -476,8 +479,8 @@ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None)
self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
self.latent_channels = 128
self.latent_dim = 3
self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
self.memory_used_decode = lambda shape, dtype: (1200 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
self.memory_used_encode = lambda shape, dtype: (80 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32)
self.upscale_index_formula = (8, 32, 32)
self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32)
Expand Down
2 changes: 1 addition & 1 deletion comfy/supported_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ class LTXAV(LTXV):

def __init__(self, unet_config):
super().__init__(unet_config)
self.memory_usage_factor = 0.055 # TODO
self.memory_usage_factor = 0.061 # TODO

def get_model(self, state_dict, prefix="", device=None):
out = model_base.LTXAV(self, device=device)
Expand Down
11 changes: 11 additions & 0 deletions comfy/text_encoders/lt.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,13 @@ def encode_token_weights(self, token_weight_pairs):

out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
out_device = out.device
if comfy.model_management.should_use_bf16(self.execution_device):
out = out.to(device=self.execution_device, dtype=torch.bfloat16)
out = out.movedim(1, -1).to(self.execution_device)
out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
out = out.reshape((out.shape[0], out.shape[1], -1))
out = self.text_embedding_projection(out)
out = out.float()
out_vid = self.video_embeddings_connector(out)[0]
out_audio = self.audio_embeddings_connector(out)[0]
out = torch.concat((out_vid, out_audio), dim=-1)
Expand All @@ -118,6 +121,14 @@ def load_sd(self, sd):

return self.load_state_dict(sdo, strict=False)

def memory_estimation_function(self, token_weight_pairs, device=None):
constant = 6.0
if comfy.model_management.should_use_bf16(device):
constant /= 2.0

token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
return num_tokens * constant * 1024 * 1024

def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
class LTXAVTEModel_(LTXAVTEModel):
Expand Down
10 changes: 9 additions & 1 deletion comfy_extras/nodes_lt_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def define_schema(cls) -> io.Schema:
io.Combo.Input(
"ckpt_name",
options=folder_paths.get_filename_list("checkpoints"),
),
io.Combo.Input(
"device",
options=["default", "cpu"],
)
],
outputs=[io.Clip.Output()],
Expand All @@ -197,7 +201,11 @@ def execute(cls, text_encoder, ckpt_name, device="default"):
clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder)
clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name)

clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type)
model_options = {}
if device == "cpu":
model_options["load_device"] = model_options["offload_device"] = torch.device("cpu")

clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options)
return io.NodeOutput(clip)


Expand Down
2 changes: 1 addition & 1 deletion comfyui_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.8.0"
__version__ = "0.8.1"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.8.0"
version = "0.8.1"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
comfyui-frontend-package==1.35.9
comfyui-workflow-templates==0.7.67
comfyui-workflow-templates==0.7.69
comfyui-embedded-docs==0.3.1
torch
torchsde
Expand All @@ -21,7 +21,7 @@ psutil
alembic
SQLAlchemy
av>=14.2.0
comfy-kitchen>=0.2.3
comfy-kitchen>=0.2.5

#non essential dependencies:
kornia>=0.7.1
Expand Down
Loading