code · pull · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -1,4 +1,2 @@
 # Admins
-* @comfyanonymous
-* @kosinkadink
-* @guill
+* @comfyanonymous @kosinkadink @guill
diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
@@ -22,6 +22,10 @@ def modulate(x, scale):
 #                               Core NextDiT Model                              #
 #############################################################################
 
+def clamp_fp16(x):
+    if x.dtype == torch.float16:
+        return torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
+    return x
 
 class JointAttention(nn.Module):
     """Multi-head attention module."""
@@ -169,7 +173,7 @@ def __init__(
 
     # @torch.compile
     def _forward_silu_gating(self, x1, x3):
-        return F.silu(x1) * x3
+        return clamp_fp16(F.silu(x1) * x3)
 
     def forward(self, x):
         return self.w2(self._forward_silu_gating(self.w1(x), self.w3(x)))
@@ -273,27 +277,27 @@ def forward(
             scale_msa, gate_msa, scale_mlp, gate_mlp = self.adaLN_modulation(adaln_input).chunk(4, dim=1)
 
             x = x + gate_msa.unsqueeze(1).tanh() * self.attention_norm2(
-                self.attention(
+                clamp_fp16(self.attention(
                     modulate(self.attention_norm1(x), scale_msa),
                     x_mask,
                     freqs_cis,
                     transformer_options=transformer_options,
-                )
+                ))
             )
             x = x + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(
-                self.feed_forward(
+                clamp_fp16(self.feed_forward(
                     modulate(self.ffn_norm1(x), scale_mlp),
-                )
+                ))
             )
         else:
             assert adaln_input is None
             x = x + self.attention_norm2(
-                self.attention(
+                clamp_fp16(self.attention(
                     self.attention_norm1(x),
                     x_mask,
                     freqs_cis,
                     transformer_options=transformer_options,
-                )
+                ))
             )
             x = x + self.ffn_norm2(
                 self.feed_forward(

diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
@@ -517,6 +517,7 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
 
 @wrap_attn
 def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
+    exception_fallback = False
     if skip_reshape:
         b, _, _, dim_head = q.shape
         tensor_layout = "HND"
@@ -541,6 +542,8 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=
         out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout)
     except Exception as e:
         logging.error("Error running sage attention: {}, using pytorch attention instead.".format(e))
+        exception_fallback = True
+    if exception_fallback:
         if tensor_layout == "NHD":
             q, k, v = map(
                 lambda t: t.transpose(1, 2),

diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py
@@ -279,6 +279,7 @@ def pytorch_attention(q, k, v):
     orig_shape = q.shape
     B = orig_shape[0]
     C = orig_shape[1]
+    oom_fallback = False
     q, k, v = map(
         lambda t: t.view(B, 1, C, -1).transpose(2, 3).contiguous(),
         (q, k, v),
@@ -289,6 +290,8 @@ def pytorch_attention(q, k, v):
         out = out.transpose(2, 3).reshape(orig_shape)
     except model_management.OOM_EXCEPTION:
         logging.warning("scaled_dot_product_attention OOMed: switched to slice attention")
+        oom_fallback = True
+    if oom_fallback:
         out = slice_attention(q.view(B, -1, C), k.view(B, -1, C).transpose(1, 2), v.view(B, -1, C).transpose(1, 2)).reshape(orig_shape)
     return out
 

diff --git a/comfy/supported_models.py b/comfy/supported_models.py
@@ -1027,6 +1027,8 @@ class ZImage(Lumina2):
 
     memory_usage_factor = 1.7
 
+    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+
     def clip_target(self, state_dict={}):
         pref = self.text_encoder_key_prefix[0]
         hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_4b.transformer.".format(pref))

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
@@ -337,7 +337,7 @@ def save_to(
         if codec != VideoCodec.AUTO and codec != VideoCodec.H264:
             raise ValueError("Only H264 codec is supported for now")
         extra_kwargs = {}
-        if format != VideoContainer.AUTO:
+        if isinstance(format, VideoContainer) and format != VideoContainer.AUTO:
             extra_kwargs["format"] = format.value
         with av.open(path, mode='w', options={'movflags': 'use_metadata_tags'}, **extra_kwargs) as output:
             # Add metadata before writing any streams

diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py
@@ -88,7 +88,7 @@ def define_schema(cls):
         )
 
     @classmethod
-    def execute(cls, video: VideoInput, filename_prefix, format, codec) -> io.NodeOutput:
+    def execute(cls, video: VideoInput, filename_prefix, format: str, codec) -> io.NodeOutput:
         width, height = video.get_dimensions()
         full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
             filename_prefix,
@@ -108,7 +108,7 @@ def execute(cls, video: VideoInput, filename_prefix, format, codec) -> io.NodeOu
         file = f"{filename}_{counter:05}_.{VideoContainer.get_extension(format)}"
         video.save_to(
             os.path.join(full_output_folder, file),
-            format=format,
+            format=VideoContainer(format),
             codec=codec,
             metadata=saved_metadata
         )