code · pull · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/comfy/ldm/lightricks/vae/audio_vae.py b/comfy/ldm/lightricks/vae/audio_vae.py
@@ -189,9 +189,12 @@ def encode(self, audio: dict) -> torch.Tensor:
         waveform = self.device_manager.move_to_load_device(waveform)
         expected_channels = self.autoencoder.encoder.in_channels
         if waveform.shape[1] != expected_channels:
-            raise ValueError(
-                f"Input audio must have {expected_channels} channels, got {waveform.shape[1]}"
-            )
+            if waveform.shape[1] == 1:
+                waveform = waveform.expand(-1, expected_channels, *waveform.shape[2:])
+            else:
+                raise ValueError(
+                    f"Input audio must have {expected_channels} channels, got {waveform.shape[1]}"
+                )
 
         mel_spec = self.preprocessor.waveform_to_mel(
             waveform, waveform_sample_rate, device=self.device_manager.load_device

diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
diff --git a/comfy/model_base.py b/comfy/model_base.py
@@ -1150,6 +1150,7 @@ def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
 class Lumina2(BaseModel):
     def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
         super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
+        self.memory_usage_factor_conds = ("ref_latents",)
 
     def extra_conds(self, **kwargs):
         out = super().extra_conds(**kwargs)
@@ -1169,6 +1170,35 @@ def extra_conds(self, **kwargs):
         if clip_text_pooled is not None:
             out['clip_text_pooled'] = comfy.conds.CONDRegular(clip_text_pooled)
 
+        clip_vision_outputs = kwargs.get("clip_vision_outputs", list(map(lambda a: a.get("clip_vision_output"), kwargs.get("unclip_conditioning", [{}]))))  # Z Image omni
+        if clip_vision_outputs is not None and len(clip_vision_outputs) > 0:
+            sigfeats = []
+            for clip_vision_output in clip_vision_outputs:
+                if clip_vision_output is not None:
+                    image_size = clip_vision_output.image_sizes[0]
+                    shape = clip_vision_output.last_hidden_state.shape
+                    sigfeats.append(clip_vision_output.last_hidden_state.reshape(shape[0], image_size[1] // 16, image_size[2] // 16, shape[-1]))
+            if len(sigfeats) > 0:
+                out['siglip_feats'] = comfy.conds.CONDList(sigfeats)
+
+        ref_latents = kwargs.get("reference_latents", None)
+        if ref_latents is not None:
+            latents = []
+            for lat in ref_latents:
+                latents.append(self.process_latent_in(lat))
+            out['ref_latents'] = comfy.conds.CONDList(latents)
+
+        ref_contexts = kwargs.get("reference_latents_text_embeds", None)
+        if ref_contexts is not None:
+            out['ref_contexts'] = comfy.conds.CONDList(ref_contexts)
+
+        return out
+
+    def extra_conds_shapes(self, **kwargs):
+        out = {}
+        ref_latents = kwargs.get("reference_latents", None)
+        if ref_latents is not None:
+            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()[2:]), ref_latents))])
         return out
 
 class WAN21(BaseModel):

diff --git a/comfy/model_detection.py b/comfy/model_detection.py
@@ -446,6 +446,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
             dit_config["time_scale"] = 1000.0
             if '{}cap_pad_token'.format(key_prefix) in state_dict_keys:
                 dit_config["pad_tokens_multiple"] = 32
+            sig_weight = state_dict.get('{}siglip_embedder.0.weight'.format(key_prefix), None)
+            if sig_weight is not None:
+                dit_config["siglip_feat_dim"] = sig_weight.shape[0]
 
         return dit_config
 

diff --git a/comfy/text_encoders/ovis.py b/comfy/text_encoders/ovis.py
@@ -61,6 +61,7 @@ def __init__(self, device="cpu", dtype=None, model_options={}):
             if dtype_llama is not None:
                 dtype = dtype_llama
             if llama_quantization_metadata is not None:
+                model_options = model_options.copy()
                 model_options["quantization_metadata"] = llama_quantization_metadata
             super().__init__(device=device, dtype=dtype, model_options=model_options)
     return OvisTEModel_
diff --git a/comfy/text_encoders/z_image.py b/comfy/text_encoders/z_image.py
@@ -40,6 +40,7 @@ def __init__(self, device="cpu", dtype=None, model_options={}):
             if dtype_llama is not None:
                 dtype = dtype_llama
             if llama_quantization_metadata is not None:
+                model_options = model_options.copy()
                 model_options["quantization_metadata"] = llama_quantization_metadata
             super().__init__(device=device, dtype=dtype, model_options=model_options)
     return ZImageTEModel_
diff --git a/comfy/utils.py b/comfy/utils.py
@@ -639,6 +639,8 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                         "proj_out.bias": "linear2.bias",
                         "attn.norm_q.weight": "norm.query_norm.scale",
                         "attn.norm_k.weight": "norm.key_norm.scale",
+                        "attn.to_qkv_mlp_proj.weight": "linear1.weight", # Flux 2
+                        "attn.to_out.weight": "linear2.weight", # Flux 2
                     }
 
         for k in block_map:

diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
@@ -1000,20 +1000,38 @@ def _expand_schema_for_dynamic(out_dict: dict[str, Any], live_inputs: dict[str,
             names = [f"{prefix}{i}" for i in range(max)]
         # need to create a new input based on the contents of input
         template_input = None
-        for _, dict_input in input.items():
-            # for now, get just the first value from dict_input
+        template_required = True
+        for _input_type, dict_input in input.items():
+            # for now, get just the first value from dict_input; if not required, min can be ignored
+            if len(dict_input) == 0:
+                continue
             template_input = list(dict_input.values())[0]
+            template_required = _input_type == "required"
+            break
+        if template_input is None:
+            raise Exception("template_input could not be determined from required or optional; this should never happen.")
         new_dict = {}
+        new_dict_added_to = False
+        # first, add possible inputs into out_dict
         for i, name in enumerate(names):
             expected_id = finalize_prefix(curr_prefix, name)
+            # required
+            if i < min and template_required:
+                out_dict["required"][expected_id] = template_input
+                type_dict = new_dict.setdefault("required", {})
+            # optional
+            else:
+                out_dict["optional"][expected_id] = template_input
+                type_dict = new_dict.setdefault("optional", {})
             if expected_id in live_inputs:
-                # required
-                if i < min:
-                    type_dict = new_dict.setdefault("required", {})
-                # optional
-                else:
-                    type_dict = new_dict.setdefault("optional", {})
+                # NOTE: prefix gets added in parse_class_inputs
                 type_dict[name] = template_input
+                new_dict_added_to = True
+        # account for the edge case that all inputs are optional and no values are received
+        if not new_dict_added_to:
+            finalized_prefix = finalize_prefix(curr_prefix)
+            out_dict["dynamic_paths"][finalized_prefix] = finalized_prefix
+            out_dict["dynamic_paths_default_value"][finalized_prefix] = DynamicPathsDefaultValue.EMPTY_DICT
         parse_class_inputs(out_dict, live_inputs, new_dict, curr_prefix)
 
 @comfytype(io_type="COMFY_DYNAMICCOMBO_V3")
@@ -1151,6 +1169,8 @@ class V3Data(TypedDict):
     'Dictionary where the keys are the hidden input ids and the values are the values of the hidden inputs.'
     dynamic_paths: dict[str, Any]
     'Dictionary where the keys are the input ids and the values dictate how to turn the inputs into a nested dictionary.'
+    dynamic_paths_default_value: dict[str, Any]
+    'Dictionary where the keys are the input ids and the values are a string from DynamicPathsDefaultValue for the inputs if value is None.'
     create_dynamic_tuple: bool
     'When True, the value of the dynamic input will be in the format (value, path_key).'
 
@@ -1504,6 +1524,7 @@ def get_finalized_class_inputs(d: dict[str, Any], live_inputs: dict[str, Any], i
         "required": {},
         "optional": {},
         "dynamic_paths": {},
+        "dynamic_paths_default_value": {},
     }
     d = d.copy()
     # ignore hidden for parsing
@@ -1513,8 +1534,12 @@ def get_finalized_class_inputs(d: dict[str, Any], live_inputs: dict[str, Any], i
         out_dict["hidden"] = hidden
     v3_data = {}
     dynamic_paths = out_dict.pop("dynamic_paths", None)
-    if dynamic_paths is not None:
+    if dynamic_paths is not None and len(dynamic_paths) > 0:
         v3_data["dynamic_paths"] = dynamic_paths
+    # this list is used for autogrow, in the case all inputs are optional and no values are passed
+    dynamic_paths_default_value = out_dict.pop("dynamic_paths_default_value", None)
+    if dynamic_paths_default_value is not None and len(dynamic_paths_default_value) > 0:
+        v3_data["dynamic_paths_default_value"] = dynamic_paths_default_value
     return out_dict, hidden, v3_data
 
 def parse_class_inputs(out_dict: dict[str, Any], live_inputs: dict[str, Any], curr_dict: dict[str, Any], curr_prefix: list[str] | None=None) -> None:
@@ -1551,11 +1576,16 @@ def add_to_dict_v1(i: Input, d: dict):
 def add_to_dict_v3(io: Input | Output, d: dict):
     d[io.id] = (io.get_io_type(), io.as_dict())
 
+class DynamicPathsDefaultValue:
+    EMPTY_DICT = "empty_dict"
+
 def build_nested_inputs(values: dict[str, Any], v3_data: V3Data):
     paths = v3_data.get("dynamic_paths", None)
+    default_value_dict = v3_data.get("dynamic_paths_default_value", {})
     if paths is None:
         return values
     values = values.copy()
+
     result = {}
 
     create_tuple = v3_data.get("create_dynamic_tuple", False)
@@ -1569,6 +1599,11 @@ def build_nested_inputs(values: dict[str, Any], v3_data: V3Data):
 
             if is_last:
                 value = values.pop(key, None)
+                if value is None:
+                    # see if a default value was provided for this key
+                    default_option = default_value_dict.get(key, None)
+                    if default_option == DynamicPathsDefaultValue.EMPTY_DICT:
+                        value = {}
                 if create_tuple:
                     value = (value, key)
                 current[p] = value

diff --git a/comfy_api_nodes/apis/bria.py b/comfy_api_nodes/apis/bria.py
@@ -0,0 +1,61 @@
+from typing import TypedDict
+
+from pydantic import BaseModel, Field
+
+
+class InputModerationSettings(TypedDict):
+    prompt_content_moderation: bool
+    visual_input_moderation: bool
+    visual_output_moderation: bool
+
+
+class BriaEditImageRequest(BaseModel):
+    instruction: str | None = Field(...)
+    structured_instruction: str | None = Field(
+        ...,
+        description="Use this instead of instruction for precise, programmatic control.",
+    )
+    images: list[str] = Field(
+        ...,
+        description="Required. Publicly available URL or Base64-encoded. Must contain exactly one item.",
+    )
+    mask: str | None = Field(
+        None,
+        description="Mask image (black and white). Black areas will be preserved, white areas will be edited. "
+        "If omitted, the edit applies to the entire image. "
+        "The input image and the the input mask must be of the same size.",
+    )
+    negative_prompt: str | None = Field(None)
+    guidance_scale: float = Field(...)
+    model_version: str = Field(...)
+    steps_num: int = Field(...)
+    seed: int = Field(...)
+    ip_signal: bool = Field(
+        False,
+        description="If true, returns a warning for potential IP content in the instruction.",
+    )
+    prompt_content_moderation: bool = Field(
+        False, description="If true, returns 422 on instruction moderation failure."
+    )
+    visual_input_content_moderation: bool = Field(
+        False, description="If true, returns 422 on images or mask moderation failure."
+    )
+    visual_output_content_moderation: bool = Field(
+        False, description="If true, returns 422 on visual output moderation failure."
+    )
+
+
+class BriaStatusResponse(BaseModel):
+    request_id: str = Field(...)
+    status_url: str = Field(...)
+    warning: str | None = Field(None)
+
+
+class BriaResult(BaseModel):
+    structured_prompt: str = Field(...)
+    image_url: str = Field(...)
+
+
+class BriaResponse(BaseModel):
+    status: str = Field(...)
+    result: BriaResult | None = Field(None)