Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
push:
branches:
- master
- release/**
paths-ignore:
- 'app/**'
- 'input/**'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-execution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Execution Tests

on:
push:
branches: [ main, master ]
branches: [ main, master, release/** ]
pull_request:
branches: [ main, master ]
branches: [ main, master, release/** ]

jobs:
test:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-launch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Test server launches without errors

on:
push:
branches: [ main, master ]
branches: [ main, master, release/** ]
pull_request:
branches: [ main, master ]
branches: [ main, master, release/** ]

jobs:
test:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Unit Tests

on:
push:
branches: [ main, master ]
branches: [ main, master, release/** ]
pull_request:
branches: [ main, master ]
branches: [ main, master, release/** ]

jobs:
test:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/update-version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
- "pyproject.toml"
branches:
- master
- release/**

jobs:
update-version:
Expand Down
4 changes: 3 additions & 1 deletion comfy/ldm/qwen_image/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ def __init__(
pooled_projection_dim: int = 768,
guidance_embeds: bool = False,
axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
default_ref_method="index",
image_model=None,
final_layer=True,
dtype=None,
Expand All @@ -334,6 +335,7 @@ def __init__(
self.in_channels = in_channels
self.out_channels = out_channels or in_channels
self.inner_dim = num_attention_heads * attention_head_dim
self.default_ref_method = default_ref_method

self.pe_embedder = EmbedND(dim=attention_head_dim, theta=10000, axes_dim=list(axes_dims_rope))

Expand Down Expand Up @@ -416,7 +418,7 @@ def _forward(
h = 0
w = 0
index = 0
ref_method = kwargs.get("ref_latents_method", "index")
ref_method = kwargs.get("ref_latents_method", self.default_ref_method)
index_ref_method = (ref_method == "index") or (ref_method == "index_timestep_zero")
timestep_zero = ref_method == "index_timestep_zero"
for ref in ref_latents:
Expand Down
77 changes: 61 additions & 16 deletions comfy_extras/nodes_model_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,22 +313,46 @@ def __init__(self, model_patch, vae, image, strength, inpaint_image=None, mask=N
self.inpaint_image = inpaint_image
self.mask = mask
self.strength = strength
self.encoded_image = self.encode_latent_cond(image)
self.encoded_image_size = (image.shape[1], image.shape[2])
self.temp_data = None
self.is_inpaint = self.model_patch.model.additional_in_dim > 0

def encode_latent_cond(self, control_image, inpaint_image=None):
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
if self.model_patch.model.additional_in_dim > 0:
if self.mask is None:
mask_ = torch.zeros_like(latent_image)[:, :1]
skip_encoding = False
if self.image is not None and self.inpaint_image is not None:
if self.image.shape != self.inpaint_image.shape:
skip_encoding = True

if skip_encoding:
self.encoded_image = None
else:
self.encoded_image = self.encode_latent_cond(self.image, self.inpaint_image)
if self.image is None:
self.encoded_image_size = (self.inpaint_image.shape[1], self.inpaint_image.shape[2])
else:
mask_ = comfy.utils.common_upscale(self.mask.mean(dim=1, keepdim=True), latent_image.shape[-1], latent_image.shape[-2], "bilinear", "none")
self.encoded_image_size = (self.image.shape[1], self.image.shape[2])
self.temp_data = None

def encode_latent_cond(self, control_image=None, inpaint_image=None):
latent_image = None
if control_image is not None:
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))

if self.is_inpaint:
if inpaint_image is None:
inpaint_image = torch.ones_like(control_image) * 0.5

if self.mask is not None:
mask_inpaint = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image.shape[-2], inpaint_image.shape[-3], "bilinear", "center")
inpaint_image = ((inpaint_image - 0.5) * mask_inpaint.movedim(1, -1).round()) + 0.5

inpaint_image_latent = comfy.latent_formats.Flux().process_in(self.vae.encode(inpaint_image))

if self.mask is None:
mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
else:
mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")

if latent_image is None:
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))

return torch.cat([latent_image, mask_, inpaint_image_latent], dim=1)
else:
return latent_image
Expand All @@ -344,13 +368,18 @@ def __call__(self, kwargs):
block_type = kwargs.get("block_type", "")
spacial_compression = self.vae.spacial_compression_encode()
if self.encoded_image is None or self.encoded_image_size != (x.shape[-2] * spacial_compression, x.shape[-1] * spacial_compression):
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
image_scaled = None
if self.image is not None:
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
self.encoded_image_size = (image_scaled.shape[-3], image_scaled.shape[-2])

inpaint_scaled = None
if self.inpaint_image is not None:
inpaint_scaled = comfy.utils.common_upscale(self.inpaint_image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
self.encoded_image_size = (inpaint_scaled.shape[-3], inpaint_scaled.shape[-2])

loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
self.encoded_image = self.encode_latent_cond(image_scaled.movedim(1, -1), inpaint_scaled)
self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
self.encoded_image = self.encode_latent_cond(image_scaled, inpaint_scaled)
comfy.model_management.load_models_gpu(loaded_models)

cnet_blocks = self.model_patch.model.n_control_layers
Expand Down Expand Up @@ -391,7 +420,8 @@ def __call__(self, kwargs):

def to(self, device_or_dtype):
if isinstance(device_or_dtype, torch.device):
self.encoded_image = self.encoded_image.to(device_or_dtype)
if self.encoded_image is not None:
self.encoded_image = self.encoded_image.to(device_or_dtype)
self.temp_data = None
return self

Expand All @@ -414,9 +444,12 @@ def INPUT_TYPES(s):

CATEGORY = "advanced/loaders/qwen"

def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=None):
def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
model_patched = model.clone()
image = image[:, :, :, :3]
if image is not None:
image = image[:, :, :, :3]
if inpaint_image is not None:
inpaint_image = inpaint_image[:, :, :, :3]
if mask is not None:
if mask.ndim == 3:
mask = mask.unsqueeze(1)
Expand All @@ -425,13 +458,24 @@ def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=No
mask = 1.0 - mask

if isinstance(model_patch.model, comfy.ldm.lumina.controlnet.ZImage_Control):
patch = ZImageControlPatch(model_patch, vae, image, strength, mask=mask)
patch = ZImageControlPatch(model_patch, vae, image, strength, inpaint_image=inpaint_image, mask=mask)
model_patched.set_model_noise_refiner_patch(patch)
model_patched.set_model_double_block_patch(patch)
else:
model_patched.set_model_double_block_patch(DiffSynthCnetPatch(model_patch, vae, image, strength, mask))
return (model_patched,)

class ZImageFunControlnet(QwenImageDiffsynthControlnet):
@classmethod
def INPUT_TYPES(s):
return {"required": { "model": ("MODEL",),
"model_patch": ("MODEL_PATCH",),
"vae": ("VAE",),
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
},
"optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}

CATEGORY = "advanced/loaders/zimage"

class UsoStyleProjectorPatch:
def __init__(self, model_patch, encoded_image):
Expand Down Expand Up @@ -479,5 +523,6 @@ def apply_patch(self, model, model_patch, clip_vision_output):
NODE_CLASS_MAPPINGS = {
"ModelPatchLoader": ModelPatchLoader,
"QwenImageDiffsynthControlnet": QwenImageDiffsynthControlnet,
"ZImageFunControlnet": ZImageFunControlnet,
"USOStyleReference": USOStyleReference,
}
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
comfyui-frontend-package==1.34.8
comfyui-frontend-package==1.34.9
comfyui-workflow-templates==0.7.59
comfyui-embedded-docs==0.3.1
torch
Expand Down
Loading