@@ -103,6 +103,63 @@ def encode(self, positive, negative, vae, width, height, length, batch_size, sta
103103 out_latent ["samples" ] = latent
104104 return (positive , negative , out_latent )
105105
106+ class Wan22FunControlToVideo :
107+ @classmethod
108+ def INPUT_TYPES (s ):
109+ return {"required" : {"positive" : ("CONDITIONING" , ),
110+ "negative" : ("CONDITIONING" , ),
111+ "vae" : ("VAE" , ),
112+ "width" : ("INT" , {"default" : 832 , "min" : 16 , "max" : nodes .MAX_RESOLUTION , "step" : 16 }),
113+ "height" : ("INT" , {"default" : 480 , "min" : 16 , "max" : nodes .MAX_RESOLUTION , "step" : 16 }),
114+ "length" : ("INT" , {"default" : 81 , "min" : 1 , "max" : nodes .MAX_RESOLUTION , "step" : 4 }),
115+ "batch_size" : ("INT" , {"default" : 1 , "min" : 1 , "max" : 4096 }),
116+ },
117+ "optional" : {"ref_image" : ("IMAGE" , ),
118+ "control_video" : ("IMAGE" , ),
119+ # "start_image": ("IMAGE", ),
120+ }}
121+
122+ RETURN_TYPES = ("CONDITIONING" , "CONDITIONING" , "LATENT" )
123+ RETURN_NAMES = ("positive" , "negative" , "latent" )
124+ FUNCTION = "encode"
125+
126+ CATEGORY = "conditioning/video_models"
127+
128+ def encode (self , positive , negative , vae , width , height , length , batch_size , ref_image = None , start_image = None , control_video = None ):
129+ latent = torch .zeros ([batch_size , 16 , ((length - 1 ) // 4 ) + 1 , height // 8 , width // 8 ], device = comfy .model_management .intermediate_device ())
130+ concat_latent = torch .zeros ([batch_size , 16 , ((length - 1 ) // 4 ) + 1 , height // 8 , width // 8 ], device = comfy .model_management .intermediate_device ())
131+ concat_latent = comfy .latent_formats .Wan21 ().process_out (concat_latent )
132+ concat_latent = concat_latent .repeat (1 , 2 , 1 , 1 , 1 )
133+ mask = torch .ones ((1 , 1 , latent .shape [2 ] * 4 , latent .shape [- 2 ], latent .shape [- 1 ]))
134+
135+ if start_image is not None :
136+ start_image = comfy .utils .common_upscale (start_image [:length ].movedim (- 1 , 1 ), width , height , "bilinear" , "center" ).movedim (1 , - 1 )
137+ concat_latent_image = vae .encode (start_image [:, :, :, :3 ])
138+ concat_latent [:,16 :,:concat_latent_image .shape [2 ]] = concat_latent_image [:,:,:concat_latent .shape [2 ]]
139+ mask [:, :, :start_image .shape [0 ] + 3 ] = 0.0
140+
141+ ref_latent = None
142+ if ref_image is not None :
143+ ref_image = comfy .utils .common_upscale (ref_image [:1 ].movedim (- 1 , 1 ), width , height , "bilinear" , "center" ).movedim (1 , - 1 )
144+ ref_latent = vae .encode (ref_image [:, :, :, :3 ])
145+
146+ if control_video is not None :
147+ control_video = comfy .utils .common_upscale (control_video [:length ].movedim (- 1 , 1 ), width , height , "bilinear" , "center" ).movedim (1 , - 1 )
148+ concat_latent_image = vae .encode (control_video [:, :, :, :3 ])
149+ concat_latent [:,:16 ,:concat_latent_image .shape [2 ]] = concat_latent_image [:,:,:concat_latent .shape [2 ]]
150+
151+ mask = mask .view (1 , mask .shape [2 ] // 4 , 4 , mask .shape [3 ], mask .shape [4 ]).transpose (1 , 2 )
152+ positive = node_helpers .conditioning_set_values (positive , {"concat_latent_image" : concat_latent , "concat_mask" : mask , "concat_mask_index" : 16 })
153+ negative = node_helpers .conditioning_set_values (negative , {"concat_latent_image" : concat_latent , "concat_mask" : mask , "concat_mask_index" : 16 })
154+
155+ if ref_latent is not None :
156+ positive = node_helpers .conditioning_set_values (positive , {"reference_latents" : [ref_latent ]}, append = True )
157+ negative = node_helpers .conditioning_set_values (negative , {"reference_latents" : [ref_latent ]}, append = True )
158+
159+ out_latent = {}
160+ out_latent ["samples" ] = latent
161+ return (positive , negative , out_latent )
162+
106163class WanFirstLastFrameToVideo :
107164 @classmethod
108165 def INPUT_TYPES (s ):
@@ -733,6 +790,7 @@ def encode(self, vae, width, height, length, batch_size, start_image=None):
733790 "WanTrackToVideo" : WanTrackToVideo ,
734791 "WanImageToVideo" : WanImageToVideo ,
735792 "WanFunControlToVideo" : WanFunControlToVideo ,
793+ "Wan22FunControlToVideo" : Wan22FunControlToVideo ,
736794 "WanFunInpaintToVideo" : WanFunInpaintToVideo ,
737795 "WanFirstLastFrameToVideo" : WanFirstLastFrameToVideo ,
738796 "WanVaceToVideo" : WanVaceToVideo ,
0 commit comments