|
121 | 121 | "instruct-pix2pix": "model.diffusion_model.input_blocks.0.0.weight", |
122 | 122 | "lumina2": ["model.diffusion_model.cap_embedder.0.weight", "cap_embedder.0.weight"], |
123 | 123 | "z-image-turbo": "cap_embedder.0.weight", |
| 124 | + "z-image-turbo-controlnet": "control_all_x_embedder.2-1.weight", |
| 125 | + "z-image-turbo-controlnet-2.x": "control_layers.14.adaLN_modulation.0.weight", |
124 | 126 | "sana": [ |
125 | 127 | "blocks.0.cross_attn.q_linear.weight", |
126 | 128 | "blocks.0.cross_attn.q_linear.bias", |
|
220 | 222 | "cosmos-2.0-v2w-2B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-2B-Video2World"}, |
221 | 223 | "cosmos-2.0-v2w-14B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-14B-Video2World"}, |
222 | 224 | "z-image-turbo": {"pretrained_model_name_or_path": "Tongyi-MAI/Z-Image-Turbo"}, |
| 225 | + "z-image-turbo-controlnet": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union"}, |
| 226 | + "z-image-turbo-controlnet-2.x": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.1"}, |
223 | 227 | } |
224 | 228 |
|
225 | 229 | # Use to configure model sample size when original config is provided |
@@ -779,6 +783,12 @@ def infer_diffusers_model_type(checkpoint): |
779 | 783 | else: |
780 | 784 | raise ValueError(f"Unexpected x_embedder shape: {x_embedder_shape} when loading Cosmos 2.0 model.") |
781 | 785 |
|
| 786 | + elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet-2.x"] in checkpoint: |
| 787 | + model_type = "z-image-turbo-controlnet-2.x" |
| 788 | + |
| 789 | + elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet"] in checkpoint: |
| 790 | + model_type = "z-image-turbo-controlnet" |
| 791 | + |
782 | 792 | else: |
783 | 793 | model_type = "v1" |
784 | 794 |
|
@@ -3885,3 +3895,17 @@ def update_state_dict(state_dict: dict[str, object], old_key: str, new_key: str) |
3885 | 3895 | handler_fn_inplace(key, converted_state_dict) |
3886 | 3896 |
|
3887 | 3897 | return converted_state_dict |
| 3898 | + |
| 3899 | + |
| 3900 | +def convert_z_image_controlnet_checkpoint_to_diffusers(checkpoint, config, **kwargs): |
| 3901 | + if config["add_control_noise_refiner"] is None: |
| 3902 | + return checkpoint |
| 3903 | + elif config["add_control_noise_refiner"] == "control_noise_refiner": |
| 3904 | + return checkpoint |
| 3905 | + elif config["add_control_noise_refiner"] == "control_layers": |
| 3906 | + converted_state_dict = { |
| 3907 | + key: checkpoint.pop(key) for key in list(checkpoint.keys()) if not key.startswith("control_noise_refiner.") |
| 3908 | + } |
| 3909 | + return converted_state_dict |
| 3910 | + else: |
| 3911 | + raise ValueError("Unknown Z-Image Turbo ControlNet type.") |
0 commit comments