code · pull · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
@@ -5,6 +5,7 @@ on:
   push:
     branches:
       - master
+      - release/**
     paths-ignore:
       - 'app/**'
       - 'input/**'

diff --git a/.github/workflows/test-execution.yml b/.github/workflows/test-execution.yml
@@ -2,9 +2,9 @@ name: Execution Tests
 
 on:
   push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
   pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
 
 jobs:
   test:

diff --git a/.github/workflows/test-launch.yml b/.github/workflows/test-launch.yml
@@ -2,9 +2,9 @@ name: Test server launches without errors
 
 on:
   push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
   pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
 
 jobs:
   test:

diff --git a/.github/workflows/test-unit.yml b/.github/workflows/test-unit.yml
@@ -2,9 +2,9 @@ name: Unit Tests
 
 on:
   push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
   pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
 
 jobs:
   test:

diff --git a/.github/workflows/update-version.yml b/.github/workflows/update-version.yml
@@ -6,6 +6,7 @@ on:
       - "pyproject.toml"
     branches:
       - master
+      - release/**
 
 jobs:
   update-version:

diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
@@ -322,6 +322,7 @@ def __init__(
         pooled_projection_dim: int = 768,
         guidance_embeds: bool = False,
         axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
+        default_ref_method="index",
         image_model=None,
         final_layer=True,
         dtype=None,
@@ -334,6 +335,7 @@ def __init__(
         self.in_channels = in_channels
         self.out_channels = out_channels or in_channels
         self.inner_dim = num_attention_heads * attention_head_dim
+        self.default_ref_method = default_ref_method
 
         self.pe_embedder = EmbedND(dim=attention_head_dim, theta=10000, axes_dim=list(axes_dims_rope))
 
@@ -416,7 +418,7 @@ def _forward(
             h = 0
             w = 0
             index = 0
-            ref_method = kwargs.get("ref_latents_method", "index")
+            ref_method = kwargs.get("ref_latents_method", self.default_ref_method)
             index_ref_method = (ref_method == "index") or (ref_method == "index_timestep_zero")
             timestep_zero = ref_method == "index_timestep_zero"
             for ref in ref_latents:

diff --git a/comfy_extras/nodes_model_patch.py b/comfy_extras/nodes_model_patch.py
@@ -313,22 +313,46 @@ def __init__(self, model_patch, vae, image, strength, inpaint_image=None, mask=N
         self.inpaint_image = inpaint_image
         self.mask = mask
         self.strength = strength
-        self.encoded_image = self.encode_latent_cond(image)
-        self.encoded_image_size = (image.shape[1], image.shape[2])
-        self.temp_data = None
+        self.is_inpaint = self.model_patch.model.additional_in_dim > 0
 
-    def encode_latent_cond(self, control_image, inpaint_image=None):
-        latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
-        if self.model_patch.model.additional_in_dim > 0:
-            if self.mask is None:
-                mask_ = torch.zeros_like(latent_image)[:, :1]
+        skip_encoding = False
+        if self.image is not None and self.inpaint_image is not None:
+            if self.image.shape != self.inpaint_image.shape:
+                skip_encoding = True
+
+        if skip_encoding:
+            self.encoded_image = None
+        else:
+            self.encoded_image = self.encode_latent_cond(self.image, self.inpaint_image)
+            if self.image is None:
+                self.encoded_image_size = (self.inpaint_image.shape[1], self.inpaint_image.shape[2])
             else:
-                mask_ = comfy.utils.common_upscale(self.mask.mean(dim=1, keepdim=True), latent_image.shape[-1], latent_image.shape[-2], "bilinear", "none")
+                self.encoded_image_size = (self.image.shape[1], self.image.shape[2])
+        self.temp_data = None
+
+    def encode_latent_cond(self, control_image=None, inpaint_image=None):
+        latent_image = None
+        if control_image is not None:
+            latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
+
+        if self.is_inpaint:
             if inpaint_image is None:
                 inpaint_image = torch.ones_like(control_image) * 0.5
 
+            if self.mask is not None:
+                mask_inpaint = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image.shape[-2], inpaint_image.shape[-3], "bilinear", "center")
+                inpaint_image = ((inpaint_image - 0.5) * mask_inpaint.movedim(1, -1).round()) + 0.5
+
             inpaint_image_latent = comfy.latent_formats.Flux().process_in(self.vae.encode(inpaint_image))
 
+            if self.mask is None:
+                mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
+            else:
+                mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
+
+            if latent_image is None:
+                latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))
+
             return torch.cat([latent_image, mask_, inpaint_image_latent], dim=1)
         else:
             return latent_image
@@ -344,13 +368,18 @@ def __call__(self, kwargs):
         block_type = kwargs.get("block_type", "")
         spacial_compression = self.vae.spacial_compression_encode()
         if self.encoded_image is None or self.encoded_image_size != (x.shape[-2] * spacial_compression, x.shape[-1] * spacial_compression):
-            image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
+            image_scaled = None
+            if self.image is not None:
+                image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
+                self.encoded_image_size = (image_scaled.shape[-3], image_scaled.shape[-2])
+
             inpaint_scaled = None
             if self.inpaint_image is not None:
                 inpaint_scaled = comfy.utils.common_upscale(self.inpaint_image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
+                self.encoded_image_size = (inpaint_scaled.shape[-3], inpaint_scaled.shape[-2])
+
             loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
-            self.encoded_image = self.encode_latent_cond(image_scaled.movedim(1, -1), inpaint_scaled)
-            self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
+            self.encoded_image = self.encode_latent_cond(image_scaled, inpaint_scaled)
             comfy.model_management.load_models_gpu(loaded_models)
 
         cnet_blocks = self.model_patch.model.n_control_layers
@@ -391,7 +420,8 @@ def __call__(self, kwargs):
 
     def to(self, device_or_dtype):
         if isinstance(device_or_dtype, torch.device):
-            self.encoded_image = self.encoded_image.to(device_or_dtype)
+            if self.encoded_image is not None:
+                self.encoded_image = self.encoded_image.to(device_or_dtype)
             self.temp_data = None
         return self
 
@@ -414,9 +444,12 @@ def INPUT_TYPES(s):
 
     CATEGORY = "advanced/loaders/qwen"
 
-    def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=None):
+    def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
         model_patched = model.clone()
-        image = image[:, :, :, :3]
+        if image is not None:
+            image = image[:, :, :, :3]
+        if inpaint_image is not None:
+            inpaint_image = inpaint_image[:, :, :, :3]
         if mask is not None:
             if mask.ndim == 3:
                 mask = mask.unsqueeze(1)
@@ -425,13 +458,24 @@ def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=No
             mask = 1.0 - mask
 
         if isinstance(model_patch.model, comfy.ldm.lumina.controlnet.ZImage_Control):
-            patch = ZImageControlPatch(model_patch, vae, image, strength, mask=mask)
+            patch = ZImageControlPatch(model_patch, vae, image, strength, inpaint_image=inpaint_image, mask=mask)
             model_patched.set_model_noise_refiner_patch(patch)
             model_patched.set_model_double_block_patch(patch)
         else:
             model_patched.set_model_double_block_patch(DiffSynthCnetPatch(model_patch, vae, image, strength, mask))
         return (model_patched,)
 
+class ZImageFunControlnet(QwenImageDiffsynthControlnet):
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "model_patch": ("MODEL_PATCH",),
+                              "vae": ("VAE",),
+                              "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
+                              },
+                "optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}
+
+    CATEGORY = "advanced/loaders/zimage"
 
 class UsoStyleProjectorPatch:
     def __init__(self, model_patch, encoded_image):
@@ -479,5 +523,6 @@ def apply_patch(self, model, model_patch, clip_vision_output):
 NODE_CLASS_MAPPINGS = {
     "ModelPatchLoader": ModelPatchLoader,
     "QwenImageDiffsynthControlnet": QwenImageDiffsynthControlnet,
+    "ZImageFunControlnet": ZImageFunControlnet,
     "USOStyleReference": USOStyleReference,
 }
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.34.8
+comfyui-frontend-package==1.34.9
 comfyui-workflow-templates==0.7.59
 comfyui-embedded-docs==0.3.1
 torch