Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions ATI/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,14 @@ class WanVideoATITracks:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("WANVIDEOMODEL", ),
"tracks": ("STRING",),
"width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
"height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
"temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
"topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percent of the steps to apply ATI"}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percent of the steps to apply ATI"}),
"model": ("WANVIDEOMODEL", {"tooltip": "Wan video diffusion model to patch with ATI motion guidance — connect from WanVideoModelLoader"}),
"tracks": ("STRING", {"tooltip": "JSON-encoded list of 2D point tracks (each track a list of {x,y} per frame) used as sparse motion guidance, e.g. CoTracker output"}),
"width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width in pixels used to normalize track coordinates; should match the latent canvas width"}),
"height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height in pixels used to normalize track coordinates; should match the latent canvas height"}),
"temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1, "tooltip": "Sharpness of the spatial gaussian that maps a track point onto nearby latent tokens; higher = tighter / more localized influence"}),
"topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1, "tooltip": "How many nearest latent tokens each track point writes into; higher spreads the motion cue across more tokens"}),
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Fraction of total sampling steps at which ATI motion guidance starts applying (0.0 = from step 0, 1.0 = never)"}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Fraction of total sampling steps at which ATI motion guidance stops applying (1.0 = through the final step)"}),
},
}

Expand Down Expand Up @@ -179,11 +179,11 @@ class WanVideoATITracksVisualize:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"images": ("IMAGE",),
"tracks": ("STRING",),
"min_radius": ("INT", {"default": 1, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the very first point (oldest)"}),
"max_radius": ("INT", {"default": 6, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the current point (newest)"}),
"max_retain": ("INT", {"default": 50, "min": 0, "max": 100, "step": 1, "tooltip": "Maximum number of points to retain"}),
"images": ("IMAGE", {"tooltip": "Video frames to overlay the track trails onto for visualization"}),
"tracks": ("STRING", {"tooltip": "JSON-encoded list of 2D point tracks (same format as WanVideoATITracks) to overlay onto the video"}),
"min_radius": ("INT", {"default": 1, "min": 0, "max": 100, "step": 1, "tooltip": "Pixel radius drawn for the oldest retained point in a track's trail"}),
"max_radius": ("INT", {"default": 6, "min": 0, "max": 100, "step": 1, "tooltip": "Pixel radius drawn for the newest point in a track's trail; trail tapers from max_radius down to min_radius"}),
"max_retain": ("INT", {"default": 50, "min": 0, "max": 100, "step": 1, "tooltip": "Maximum number of past frames to keep in each track's trail before older points fall off"}),
},
}

Expand Down Expand Up @@ -281,12 +281,12 @@ class WanVideoATI_comfy:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL", ),
"width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
"height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
"tracks": ("STRING",),
"temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
"topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
"model": ("MODEL", {"tooltip": "Native ComfyUI MODEL to patch with ATI motion guidance (concat_cond override) — connect from a model loader"}),
"width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width in pixels used to normalize track coordinates; should match the latent canvas width"}),
"height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height in pixels used to normalize track coordinates; should match the latent canvas height"}),
"tracks": ("STRING", {"tooltip": "JSON-encoded list of 2D point tracks (each track a list of {x,y} per frame) used as sparse motion guidance, e.g. CoTracker output"}),
"temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1, "tooltip": "Sharpness of the spatial gaussian that maps a track point onto nearby latent tokens; higher = tighter / more localized influence"}),
"topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1, "tooltip": "How many nearest latent tokens each track point writes into; higher spreads the motion cue across more tokens"}),
},
}

Expand Down
8 changes: 4 additions & 4 deletions FlashVSR/flashvsr_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ class WanVideoAddFlashVSRInput:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"embeds": ("WANVIDIMAGE_EMBEDS",),
"images": ("IMAGE", {"tooltip": "Low-res video frames to enhance"}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.01, "tooltip": "Strength to apply the FlashVSR latent"}),
"embeds": ("WANVIDIMAGE_EMBEDS", {"tooltip": "Base image embeds to extend with FlashVSR low-res conditioning — connect from a WanVideo*Embeds producer"}),
"images": ("IMAGE", {"tooltip": "Per-frame low-quality / low-resolution source video to super-resolve; passed through FlashVSR as the LQ conditioning signal"}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.01, "tooltip": "Multiplier on the FlashVSR low-res conditioning latent; 1.0 = full super-resolution guidance, 0 disables it"}),
}
}

Expand All @@ -37,7 +37,7 @@ def INPUT_TYPES(s):
},
"optional": {
"precision": (["fp16", "fp32", "bf16"],
{"default": "bf16"}
{"default": "bf16", "tooltip": "Compute dtype the FlashVSR TCDecoder loads at; bf16 default matches the released weights"}
),
}
}
Expand Down
22 changes: 11 additions & 11 deletions HuMo/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def INPUT_TYPES(s):
return {
"required": {
"model": (folder_paths.get_filename_list("audio_encoders"), {"tooltip": "These models are loaded from the 'ComfyUI/models/audio_encoders' folder",}),
"base_precision": (["fp32", "bf16", "fp16"], {"default": "fp16"}),
"base_precision": (["fp32", "bf16", "fp16"], {"default": "fp16", "tooltip": "Computation/storage dtype for the Whisper encoder weights; fp16 is the safe default, fp32 is most accurate but uses more VRAM"}),
"load_device": (["main_device", "offload_device"], {"default": "main_device", "tooltip": "Initial device to load the model to, NOT recommended with the larger models unless you have 48GB+ VRAM"}),
},
}
Expand Down Expand Up @@ -120,19 +120,19 @@ class HuMoEmbeds:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"num_frames": ("INT", {"default": 81, "min": -1, "max": 10000, "step": 1, "tooltip": "The total frame count to generate."}),
"width": ("INT", {"default": 832, "min": 64, "max": 4096, "step": 16}),
"height": ("INT", {"default": 480, "min": 64, "max": 4096, "step": 16}),
"audio_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "Strength of the audio conditioning"}),
"num_frames": ("INT", {"default": 81, "min": -1, "max": 10000, "step": 1, "tooltip": "Total frame count to generate; -1 derives the length from the audio duration"}),
"width": ("INT", {"default": 832, "min": 64, "max": 4096, "step": 16, "tooltip": "Output width in pixels; should be a multiple of 16 and match a resolution the base model was trained on"}),
"height": ("INT", {"default": 480, "min": 64, "max": 4096, "step": 16, "tooltip": "Output height in pixels; should be a multiple of 16 and match a resolution the base model was trained on"}),
"audio_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "Strength of the audio conditioning applied to the cross-attention; higher = more pronounced lip motion, 1.0 is the trained default"}),
"audio_cfg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "When not 1.0, an extra model pass without audio conditioning is done: slower inference but more motion is allowed"}),
"audio_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to start applying audio conditioning"}),
"audio_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to stop applying audio conditioning"})
},
"optional" : {
"whisper_model": ("WHISPERMODEL",),
"vae": ("WANVAE", ),
"reference_images": ("IMAGE", {"tooltip": "reference images for the humo model"}),
"audio": ("AUDIO",),
"whisper_model": ("WHISPERMODEL", {"tooltip": "Loaded Whisper encoder used to extract audio features for HuMo — connect from Whisper Model Loader. Required if audio is wired."}),
"vae": ("WANVAE", {"tooltip": "Loaded Wan VAE used to encode reference_images into latent space — connect from WanVideoVAELoader. Required if reference_images is wired."}),
"reference_images": ("IMAGE", {"tooltip": "Optional reference images for the HuMo model; resized to width × height and VAE-encoded into the latent stream as identity anchors"}),
"audio": ("AUDIO", {"tooltip": "Optional speaker audio waveform; resampled to 16 kHz and run through the Whisper encoder to extract per-frame audio features. If omitted, audio conditioning is zeroed."}),
"tiled_vae": ("BOOLEAN", {"default": False, "tooltip": "Use tiled VAE encoding for reduced memory use"}),
}
}
Expand Down Expand Up @@ -257,8 +257,8 @@ class WanVideoCombineEmbeds:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"embeds_1": ("WANVIDIMAGE_EMBEDS",),
"embeds_2": ("WANVIDIMAGE_EMBEDS",),
"embeds_1": ("WANVIDIMAGE_EMBEDS", {"tooltip": "First Wan image-embeds bundle; merged key-by-key with embeds_2 (embeds_2 keys win on conflict). Experimental — connect from any WANVIDIMAGE_EMBEDS producer."}),
"embeds_2": ("WANVIDIMAGE_EMBEDS", {"tooltip": "Second Wan image-embeds bundle; merged on top of embeds_1 so its keys override. Use to combine e.g. HuMo audio embeds with a separate Wan I2V image_embeds."}),
}
}

Expand Down
8 changes: 4 additions & 4 deletions LongVie2/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ class WanVideoAddDualControlEmbeds:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"embeds": ("WANVIDIMAGE_EMBEDS",),
"vae": ("WANVAE", {"tooltip": "VAE model"}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "tooltip": "Strength of the reference embedding"}),
"embeds": ("WANVIDIMAGE_EMBEDS", {"tooltip": "Base image embeds to extend with LongVie dual (dense + sparse) control latents — connect from an image-embeds producer (e.g. WanVideoImageToVideoEncode)"}),
"vae": ("WANVAE", {"tooltip": "Wan VAE used to encode dense/sparse/prev control videos into latents — connect from WanVideoVAELoader"}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "tooltip": "Multiplier on the dual (dense + sparse) control embedding injected alongside the base image embeds; 1.0 is baseline, 0 disables"}),
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percentage of the embedding application"}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percentage of the embedding application"}),
"first_frame_noise_level": ("FLOAT", {"default": 0.925926, "min": 0.0, "max": 1.0, "step": 0.000001, "tooltip": "Noise level for the first frame when using previous frames"}),
},
"optional": {
"dense": ("IMAGE", {"tooltip": "Dense control signal (depth) video input"}),
"dense": ("IMAGE", {"tooltip": "Per-frame dense control video (typically inverted depth maps); colors are inverted internally to match comfy depth convention"}),
"sparse": ("IMAGE", {"tooltip": "Sparse control signal (tracks) video input"}),
"prev_images": ("IMAGE", {"tooltip": "Previous frames for temporal consistency, default is 8 frames"}),
}
Expand Down
22 changes: 11 additions & 11 deletions MTV/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class DownloadAndLoadNLFModel:
def INPUT_TYPES(s):
return {
"required": {
"url": (model_list, {"default": "https://github.com/isarandi/nlf/releases/download/v0.3.2/nlf_l_multi_0.3.2.torchscript"}),
"url": (model_list, {"default": "https://github.com/isarandi/nlf/releases/download/v0.3.2/nlf_l_multi_0.3.2.torchscript", "tooltip": "Source URL for the NLF (Neural Localizer Fields) SMPL pose model; auto-downloaded into ComfyUI/models/nlf on first use"}),
},
"optional": {
"warmup": ("BOOLEAN", {"default": True, "tooltip": "Whether to warmup the model after loading"}),
Expand Down Expand Up @@ -176,8 +176,8 @@ class MTVCrafterEncodePoses:
def INPUT_TYPES(s):
return {
"required": {
"vqvae": ("VQVAE", {"tooltip": "VQVAE model"}),
"poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
"vqvae": ("VQVAE", {"tooltip": "MTVCrafter motion VQ-VAE — connect from LoadVQVAE (encodes SMPL pose sequences into motion tokens)"}),
"poses": ("NLFPRED", {"tooltip": "NLF SMPL pose predictions to tokenize — connect from NLFPredict"}),
},
}

Expand Down Expand Up @@ -218,8 +218,8 @@ class NLFPredict:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("NLFMODEL",),
"images": ("IMAGE", {"tooltip": "Input images for the model"}),
"model": ("NLFMODEL", {"tooltip": "NLF SMPL pose detector — connect from LoadNLFModel or DownloadAndLoadNLFModel"}),
"images": ("IMAGE", {"tooltip": "Per-frame images to run NLF SMPL pose detection on; returns 3D joint predictions and per-frame bounding boxes"}),
},
"optional": {
"per_batch": ("INT", {"default": -1, "min": -1, "max": 10000, "step": 1, "tooltip": "How many images to process at once. -1 means all at once."}),
Expand Down Expand Up @@ -294,14 +294,14 @@ class DrawNLFPoses:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
"width": ("INT", {"default": 512}),
"height": ("INT", {"default": 512}),
"poses": ("NLFPRED", {"tooltip": "NLF SMPL pose predictions to render — connect from NLFPredict"}),
"width": ("INT", {"default": 512, "tooltip": "Output pose-image width in pixels; should match the canvas size of the target video"}),
"height": ("INT", {"default": 512, "tooltip": "Output pose-image height in pixels; should match the canvas size of the target video"}),
},
"optional": {
"stick_width": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 1000.0, "step": 0.01, "tooltip": "Stick width multiplier"}),
"point_radius": ("INT", {"default": 5, "min": 1, "max": 10, "step": 1, "tooltip": "Point radius for drawing the pose"}),
"style": (["original", "scail"], {"default": "original", "tooltip": "style of the pose drawing"}),
"stick_width": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 1000.0, "step": 0.01, "tooltip": "Pixel width of the limb lines connecting keypoints"}),
"point_radius": ("INT", {"default": 5, "min": 1, "max": 10, "step": 1, "tooltip": "Pixel radius of the keypoint dots drawn on each joint"}),
"style": (["original", "scail"], {"default": "original", "tooltip": "Pose-drawing style — 'original' is the default MTVCrafter look, 'scail' matches the SCAIL controlnet's expected input"}),
}
}

Expand Down
Loading