kijai · laurigates · May 19, 2026
diff --git a/ATI/nodes.py b/ATI/nodes.py
@@ -139,14 +139,14 @@ class WanVideoATITracks:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-            "model": ("WANVIDEOMODEL", ),
-            "tracks": ("STRING",),
-            "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
-            "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
-            "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
-            "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
-            "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percent of the steps to apply ATI"}),
-            "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percent of the steps to apply ATI"}),
+            "model": ("WANVIDEOMODEL",  {"tooltip": "Wan video diffusion model to patch with ATI motion guidance — connect from WanVideoModelLoader"}),
+            "tracks": ("STRING", {"tooltip": "JSON-encoded list of 2D point tracks (each track a list of {x,y} per frame) used as sparse motion guidance, e.g. CoTracker output"}),
+            "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width in pixels used to normalize track coordinates; should match the latent canvas width"}),
+            "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height in pixels used to normalize track coordinates; should match the latent canvas height"}),
+            "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1, "tooltip": "Sharpness of the spatial gaussian that maps a track point onto nearby latent tokens; higher = tighter / more localized influence"}),
+            "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1, "tooltip": "How many nearest latent tokens each track point writes into; higher spreads the motion cue across more tokens"}),
+            "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Fraction of total sampling steps at which ATI motion guidance starts applying (0.0 = from step 0, 1.0 = never)"}),
+            "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Fraction of total sampling steps at which ATI motion guidance stops applying (1.0 = through the final step)"}),
         },
         }
 
@@ -179,11 +179,11 @@ class WanVideoATITracksVisualize:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-            "images": ("IMAGE",),
-            "tracks": ("STRING",),
-            "min_radius": ("INT", {"default": 1, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the very first point (oldest)"}),
-            "max_radius": ("INT", {"default": 6, "min": 0, "max": 100, "step": 1, "tooltip": "radius for the current point (newest)"}),
-            "max_retain": ("INT", {"default": 50, "min": 0, "max": 100, "step": 1, "tooltip": "Maximum number of points to retain"}),
+            "images": ("IMAGE", {"tooltip": "Video frames to overlay the track trails onto for visualization"}),
+            "tracks": ("STRING", {"tooltip": "JSON-encoded list of 2D point tracks (same format as WanVideoATITracks) to overlay onto the video"}),
+            "min_radius": ("INT", {"default": 1, "min": 0, "max": 100, "step": 1, "tooltip": "Pixel radius drawn for the oldest retained point in a track's trail"}),
+            "max_radius": ("INT", {"default": 6, "min": 0, "max": 100, "step": 1, "tooltip": "Pixel radius drawn for the newest point in a track's trail; trail tapers from max_radius down to min_radius"}),
+            "max_retain": ("INT", {"default": 50, "min": 0, "max": 100, "step": 1, "tooltip": "Maximum number of past frames to keep in each track's trail before older points fall off"}),
         },
         }
 
@@ -281,12 +281,12 @@ class WanVideoATI_comfy:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-            "model": ("MODEL", ),
-            "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width of the image to encode"}),
-            "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height of the image to encode"}),
-            "tracks": ("STRING",),
-            "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1}),
-            "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1}),
+            "model": ("MODEL",  {"tooltip": "Native ComfyUI MODEL to patch with ATI motion guidance (concat_cond override) — connect from a model loader"}),
+            "width": ("INT", {"default": 832, "min": 64, "max": 2048, "step": 8, "tooltip": "Width in pixels used to normalize track coordinates; should match the latent canvas width"}),
+            "height": ("INT", {"default": 480, "min": 64, "max": 29048, "step": 8, "tooltip": "Height in pixels used to normalize track coordinates; should match the latent canvas height"}),
+            "tracks": ("STRING", {"tooltip": "JSON-encoded list of 2D point tracks (each track a list of {x,y} per frame) used as sparse motion guidance, e.g. CoTracker output"}),
+            "temperature": ("FLOAT", {"default": 220.0, "min": 0.0, "max": 1000.0, "step": 0.1, "tooltip": "Sharpness of the spatial gaussian that maps a track point onto nearby latent tokens; higher = tighter / more localized influence"}),
+            "topk": ("INT", {"default": 2, "min": 1, "max": 10, "step": 1, "tooltip": "How many nearest latent tokens each track point writes into; higher spreads the motion cue across more tokens"}),
             },
         }
 

diff --git a/FlashVSR/flashvsr_nodes.py b/FlashVSR/flashvsr_nodes.py
@@ -10,9 +10,9 @@ class WanVideoAddFlashVSRInput:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-                    "embeds": ("WANVIDIMAGE_EMBEDS",),
-                    "images": ("IMAGE", {"tooltip": "Low-res video frames to enhance"}),
-                    "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.01, "tooltip": "Strength to apply the FlashVSR latent"}),
+                    "embeds": ("WANVIDIMAGE_EMBEDS", {"tooltip": "Base image embeds to extend with FlashVSR low-res conditioning — connect from a WanVideo*Embeds producer"}),
+                    "images": ("IMAGE", {"tooltip": "Per-frame low-quality / low-resolution source video to super-resolve; passed through FlashVSR as the LQ conditioning signal"}),
+                    "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.01, "tooltip": "Multiplier on the FlashVSR low-res conditioning latent; 1.0 = full super-resolution guidance, 0 disables it"}),
                 }
         }
 
@@ -37,7 +37,7 @@ def INPUT_TYPES(s):
             },
             "optional": {
                 "precision": (["fp16", "fp32", "bf16"],
-                    {"default": "bf16"}
+                    {"default": "bf16", "tooltip": "Compute dtype the FlashVSR TCDecoder loads at; bf16 default matches the released weights"}
                 ),
             }
         }

diff --git a/HuMo/nodes.py b/HuMo/nodes.py
@@ -56,7 +56,7 @@ def INPUT_TYPES(s):
         return {
             "required": {
                 "model": (folder_paths.get_filename_list("audio_encoders"), {"tooltip": "These models are loaded from the 'ComfyUI/models/audio_encoders' folder",}),
-                "base_precision": (["fp32", "bf16", "fp16"], {"default": "fp16"}),
+                "base_precision": (["fp32", "bf16", "fp16"], {"default": "fp16", "tooltip": "Computation/storage dtype for the Whisper encoder weights; fp16 is the safe default, fp32 is most accurate but uses more VRAM"}),
                 "load_device": (["main_device", "offload_device"], {"default": "main_device", "tooltip": "Initial device to load the model to, NOT recommended with the larger models unless you have 48GB+ VRAM"}),
             },
         }
@@ -120,19 +120,19 @@ class HuMoEmbeds:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-            "num_frames": ("INT", {"default": 81, "min": -1, "max": 10000, "step": 1, "tooltip": "The total frame count to generate."}),
-            "width": ("INT", {"default": 832, "min": 64, "max": 4096, "step": 16}),
-            "height": ("INT", {"default": 480, "min": 64, "max": 4096, "step": 16}),
-            "audio_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "Strength of the audio conditioning"}),
+            "num_frames": ("INT", {"default": 81, "min": -1, "max": 10000, "step": 1, "tooltip": "Total frame count to generate; -1 derives the length from the audio duration"}),
+            "width": ("INT", {"default": 832, "min": 64, "max": 4096, "step": 16, "tooltip": "Output width in pixels; should be a multiple of 16 and match a resolution the base model was trained on"}),
+            "height": ("INT", {"default": 480, "min": 64, "max": 4096, "step": 16, "tooltip": "Output height in pixels; should be a multiple of 16 and match a resolution the base model was trained on"}),
+            "audio_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "Strength of the audio conditioning applied to the cross-attention; higher = more pronounced lip motion, 1.0 is the trained default"}),
             "audio_cfg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "tooltip": "When not 1.0, an extra model pass without audio conditioning is done: slower inference but more motion is allowed"}),
             "audio_start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to start applying audio conditioning"}),
             "audio_end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The percent of the video to stop applying audio conditioning"})
         },
             "optional" : {
-                "whisper_model": ("WHISPERMODEL",),
-                "vae": ("WANVAE", ),
-                "reference_images": ("IMAGE", {"tooltip": "reference images for the humo model"}),
-                "audio": ("AUDIO",),
+                "whisper_model": ("WHISPERMODEL", {"tooltip": "Loaded Whisper encoder used to extract audio features for HuMo — connect from Whisper Model Loader. Required if audio is wired."}),
+                "vae": ("WANVAE",  {"tooltip": "Loaded Wan VAE used to encode reference_images into latent space — connect from WanVideoVAELoader. Required if reference_images is wired."}),
+                "reference_images": ("IMAGE", {"tooltip": "Optional reference images for the HuMo model; resized to width × height and VAE-encoded into the latent stream as identity anchors"}),
+                "audio": ("AUDIO", {"tooltip": "Optional speaker audio waveform; resampled to 16 kHz and run through the Whisper encoder to extract per-frame audio features. If omitted, audio conditioning is zeroed."}),
                 "tiled_vae": ("BOOLEAN", {"default": False, "tooltip": "Use tiled VAE encoding for reduced memory use"}),
             }
         }
@@ -257,8 +257,8 @@ class WanVideoCombineEmbeds:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-                    "embeds_1": ("WANVIDIMAGE_EMBEDS",),
-                    "embeds_2": ("WANVIDIMAGE_EMBEDS",),
+                    "embeds_1": ("WANVIDIMAGE_EMBEDS", {"tooltip": "First Wan image-embeds bundle; merged key-by-key with embeds_2 (embeds_2 keys win on conflict). Experimental — connect from any WANVIDIMAGE_EMBEDS producer."}),
+                    "embeds_2": ("WANVIDIMAGE_EMBEDS", {"tooltip": "Second Wan image-embeds bundle; merged on top of embeds_1 so its keys override. Use to combine e.g. HuMo audio embeds with a separate Wan I2V image_embeds."}),
                 }
         }
 

diff --git a/LongVie2/nodes.py b/LongVie2/nodes.py
@@ -9,15 +9,15 @@ class WanVideoAddDualControlEmbeds:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-                    "embeds": ("WANVIDIMAGE_EMBEDS",),
-                    "vae": ("WANVAE", {"tooltip": "VAE model"}),
-                    "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "tooltip": "Strength of the reference embedding"}),
+                    "embeds": ("WANVIDIMAGE_EMBEDS", {"tooltip": "Base image embeds to extend with LongVie dual (dense + sparse) control latents — connect from an image-embeds producer (e.g. WanVideoImageToVideoEncode)"}),
+                    "vae": ("WANVAE", {"tooltip": "Wan VAE used to encode dense/sparse/prev control videos into latents — connect from WanVideoVAELoader"}),
+                    "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "tooltip": "Multiplier on the dual (dense + sparse) control embedding injected alongside the base image embeds; 1.0 is baseline, 0 disables"}),
                     "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percentage of the embedding application"}),
                     "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percentage of the embedding application"}),
                     "first_frame_noise_level": ("FLOAT", {"default": 0.925926, "min": 0.0, "max": 1.0, "step": 0.000001, "tooltip": "Noise level for the first frame when using previous frames"}),
                 },
                 "optional": {
-                    "dense": ("IMAGE", {"tooltip": "Dense control signal (depth) video input"}),
+                    "dense": ("IMAGE", {"tooltip": "Per-frame dense control video (typically inverted depth maps); colors are inverted internally to match comfy depth convention"}),
                     "sparse": ("IMAGE", {"tooltip": "Sparse control signal (tracks) video input"}),
                     "prev_images": ("IMAGE", {"tooltip": "Previous frames for temporal consistency, default is 8 frames"}),
                 }

diff --git a/MTV/nodes.py b/MTV/nodes.py
@@ -47,7 +47,7 @@ class DownloadAndLoadNLFModel:
     def INPUT_TYPES(s):
         return {
             "required": {
-                "url": (model_list, {"default": "https://github.com/isarandi/nlf/releases/download/v0.3.2/nlf_l_multi_0.3.2.torchscript"}),
+                "url": (model_list, {"default": "https://github.com/isarandi/nlf/releases/download/v0.3.2/nlf_l_multi_0.3.2.torchscript", "tooltip": "Source URL for the NLF (Neural Localizer Fields) SMPL pose model; auto-downloaded into ComfyUI/models/nlf on first use"}),
              },
              "optional": {
                  "warmup": ("BOOLEAN", {"default": True, "tooltip": "Whether to warmup the model after loading"}),
@@ -176,8 +176,8 @@ class MTVCrafterEncodePoses:
     def INPUT_TYPES(s):
         return {
             "required": {
-                "vqvae": ("VQVAE", {"tooltip": "VQVAE model"}),
-                "poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
+                "vqvae": ("VQVAE", {"tooltip": "MTVCrafter motion VQ-VAE — connect from LoadVQVAE (encodes SMPL pose sequences into motion tokens)"}),
+                "poses": ("NLFPRED", {"tooltip": "NLF SMPL pose predictions to tokenize — connect from NLFPredict"}),
             },
         }
 
@@ -218,8 +218,8 @@ class NLFPredict:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-            "model": ("NLFMODEL",),
-            "images": ("IMAGE", {"tooltip": "Input images for the model"}),
+            "model": ("NLFMODEL", {"tooltip": "NLF SMPL pose detector — connect from LoadNLFModel or DownloadAndLoadNLFModel"}),
+            "images": ("IMAGE", {"tooltip": "Per-frame images to run NLF SMPL pose detection on; returns 3D joint predictions and per-frame bounding boxes"}),
             },
             "optional": {
                 "per_batch": ("INT", {"default": -1, "min": -1, "max": 10000, "step": 1, "tooltip": "How many images to process at once. -1 means all at once."}),
@@ -294,14 +294,14 @@ class DrawNLFPoses:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {
-            "poses": ("NLFPRED", {"tooltip": "Input poses for the model"}),
-            "width": ("INT", {"default": 512}),
-            "height": ("INT", {"default": 512}),
+            "poses": ("NLFPRED", {"tooltip": "NLF SMPL pose predictions to render — connect from NLFPredict"}),
+            "width": ("INT", {"default": 512, "tooltip": "Output pose-image width in pixels; should match the canvas size of the target video"}),
+            "height": ("INT", {"default": 512, "tooltip": "Output pose-image height in pixels; should match the canvas size of the target video"}),
             },
             "optional": {
-                "stick_width": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 1000.0, "step": 0.01, "tooltip": "Stick width multiplier"}),
-                "point_radius": ("INT", {"default": 5, "min": 1, "max": 10, "step": 1, "tooltip": "Point radius for drawing the pose"}),
-                "style": (["original", "scail"], {"default": "original", "tooltip": "style of the pose drawing"}),
+                "stick_width": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 1000.0, "step": 0.01, "tooltip": "Pixel width of the limb lines connecting keypoints"}),
+                "point_radius": ("INT", {"default": 5, "min": 1, "max": 10, "step": 1, "tooltip": "Pixel radius of the keypoint dots drawn on each joint"}),
+                "style": (["original", "scail"], {"default": "original", "tooltip": "Pose-drawing style — 'original' is the default MTVCrafter look, 'scail' matches the SCAIL controlnet's expected input"}),
             }
     }