{ "last_node_id": 8, "last_link_id": 10, "nodes": [ { "id": 1, "type": "LoadImage", "pos": [50, 100], "size": [315, 314], "widgets_values": ["input_frame.png", "image"], "title": "API Input Image", "flags": {}, "order": 0, "mode": 0, "properties": { "Node name for S&R": "LoadImage" }, "outputs": [ { "name": "IMAGE", "type": "IMAGE", "links": [1], "slot_index": 0 }, { "name": "MASK", "type": "MASK", "links": null } ] }, { "id": 2, "type": "DownloadAndLoadCogVideoModel", "pos": [50, 500], "size": [350, 100], "widgets_values": ["THUDM/CogVideoX-5b-I2V", "bf16", "disabled", true], "title": "CogVideoX-5b-I2V Loader", "flags": {}, "order": 1, "mode": 0, "properties": { "Node name for S&R": "DownloadAndLoadCogVideoModel" }, "outputs": [ { "name": "model", "type": "COGVIDEOMODEL", "links": [2], "slot_index": 0 }, { "name": "vae", "type": "VAE", "links": [3, 10], "slot_index": 1 } ] }, { "id": 7, "type": "CLIPLoader", "pos": [50, 650], "size": [350, 100], "widgets_values": ["t5xxl_fp16.safetensors", "sd3"], "title": "T5 CLIP Loader", "flags": {}, "order": 2, "mode": 0, "properties": { "Node name for S&R": "CLIPLoader" }, "outputs": [ { "name": "CLIP", "type": "CLIP", "links": [4, 5], "slot_index": 0 } ] }, { "id": 3, "type": "CogVideoTextEncode", "pos": [450, 100], "size": [400, 200], "widgets_values": ["Camera movement description, action, scene details", 1, false], "title": "API Video Prompt (Positive)", "flags": {}, "order": 3, "mode": 0, "properties": { "Node name for S&R": "CogVideoTextEncode" }, "inputs": [ { "name": "clip", "type": "CLIP", "link": 4 } ], "outputs": [ { "name": "conditioning", "type": "CONDITIONING", "links": [6], "slot_index": 0 }, { "name": "clip", "type": "CLIP", "links": null } ] }, { "id": 8, "type": "CogVideoTextEncode", "pos": [450, 350], "size": [400, 200], "widgets_values": ["low quality, blurry, distorted, watermark", 1, true], "title": "API Video Prompt (Negative)", "flags": {}, "order": 4, "mode": 0, "properties": { "Node name for S&R": "CogVideoTextEncode" }, "inputs": [ { "name": "clip", "type": "CLIP", "link": 5 } ], "outputs": [ { "name": "conditioning", "type": "CONDITIONING", "links": [7], "slot_index": 0 }, { "name": "clip", "type": "CLIP", "links": null } ] }, { "id": 9, "type": "CogVideoImageEncode", "pos": [450, 600], "size": [315, 100], "widgets_values": [], "title": "Encode Input Image", "flags": {}, "order": 5, "mode": 0, "properties": { "Node name for S&R": "CogVideoImageEncode" }, "inputs": [ { "name": "vae", "type": "VAE", "link": 3 }, { "name": "start_image", "type": "IMAGE", "link": 1 } ], "outputs": [ { "name": "samples", "type": "LATENT", "links": [8], "slot_index": 0 } ] }, { "id": 4, "type": "CogVideoSampler", "pos": [900, 100], "size": [315, 474], "widgets_values": [49, 50, 6.0, 42, "fixed", "CogVideoXDDIM", 1.0], "title": "CogVideoX Sampler (6s @ 8fps)", "flags": {}, "order": 6, "mode": 0, "properties": { "Node name for S&R": "CogVideoSampler" }, "inputs": [ { "name": "model", "type": "COGVIDEOMODEL", "link": 2 }, { "name": "positive", "type": "CONDITIONING", "link": 6 }, { "name": "negative", "type": "CONDITIONING", "link": 7 }, { "name": "image_cond_latents", "type": "LATENT", "link": 8 } ], "outputs": [ { "name": "samples", "type": "LATENT", "links": [9], "slot_index": 0 } ] }, { "id": 5, "type": "CogVideoDecode", "pos": [1250, 100], "size": [315, 200], "widgets_values": [true, 240, 360, 0.25, 0.25], "title": "VAE Decode Video", "flags": {}, "order": 7, "mode": 0, "properties": { "Node name for S&R": "CogVideoDecode" }, "inputs": [ { "name": "vae", "type": "VAE", "link": 10 }, { "name": "samples", "type": "LATENT", "link": 9 } ], "outputs": [ { "name": "images", "type": "IMAGE", "links": [11], "slot_index": 0 } ] }, { "id": 6, "type": "VHS_VideoCombine", "pos": [1600, 100], "size": [315, 200], "widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4", "yuv420p", 19, true, false], "title": "Combine Video Frames", "flags": {}, "order": 8, "mode": 0, "properties": { "Node name for S&R": "VHS_VideoCombine" }, "inputs": [ { "name": "images", "type": "IMAGE", "link": 11 } ], "outputs": [ { "name": "Filenames", "type": "VHS_FILENAMES", "links": null } ] } ], "links": [ [1, 1, 0, 9, 1, "IMAGE"], [2, 2, 0, 4, 0, "COGVIDEOMODEL"], [3, 2, 1, 9, 0, "VAE"], [4, 7, 0, 3, 0, "CLIP"], [5, 7, 0, 8, 0, "CLIP"], [6, 3, 0, 4, 1, "CONDITIONING"], [7, 8, 0, 4, 2, "CONDITIONING"], [8, 9, 0, 4, 3, "LATENT"], [9, 4, 0, 5, 1, "LATENT"], [10, 2, 1, 5, 0, "VAE"], [11, 5, 0, 6, 0, "IMAGE"] ], "groups": [], "config": {}, "extra": { "workflow_info": { "name": "CogVideoX Image-to-Video Production", "version": "1.2.0", "description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.", "category": "image-to-video", "tags": ["cogvideox", "i2v", "video-generation", "production"], "requirements": { "models": ["CogVideoX-5b-I2V", "T5-XXL FP16"], "custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"], "vram_min": "20GB", "vram_recommended": "24GB" }, "parameters": { "input_image": { "node_id": 1, "widget_index": 0, "type": "image", "required": true, "description": "Starting frame for video generation" }, "positive_prompt": { "node_id": 3, "widget_index": 0, "type": "string", "required": true, "default": "Camera movement description, action, scene details", "description": "Describe desired camera movement, actions, and scene" }, "negative_prompt": { "node_id": 8, "widget_index": 0, "type": "string", "required": false, "default": "low quality, blurry, distorted, watermark", "description": "Undesired elements to avoid" }, "num_frames": { "node_id": 4, "widget_index": 0, "type": "integer", "required": false, "default": 49, "min": 1, "max": 1024, "description": "Number of frames to generate (49 = ~6s @ 8fps)" }, "steps": { "node_id": 4, "widget_index": 1, "type": "integer", "required": false, "default": 50, "min": 20, "max": 100, "description": "Sampling steps (50 recommended for quality)" }, "cfg": { "node_id": 4, "widget_index": 2, "type": "float", "required": false, "default": 6.0, "min": 1.0, "max": 30.0, "description": "Classifier-free guidance scale" }, "seed": { "node_id": 4, "widget_index": 3, "type": "integer", "required": false, "default": 42, "description": "Random seed for reproducibility" }, "fps": { "node_id": 6, "widget_index": 0, "type": "integer", "required": false, "default": 8, "description": "Output video framerate" } }, "outputs": { "video": { "node_id": 6, "type": "video", "format": "MP4 (H.264)", "resolution": "Based on input image", "duration": "~6 seconds @ 8fps (49 frames)" } }, "performance": { "avg_generation_time": "120-180 seconds", "vram_usage": "~20-22GB", "gpu_utilization": "95-100%" }, "use_cases": [ "Animate static images with camera motion", "Create video loops from single frames", "Add dynamic movement to product shots", "Generate cinematic camera movements" ], "notes": [ "CogVideoX-5b-I2V is specifically trained for image-to-video generation", "Model will download automatically on first use (~10GB)", "Enable VAE tiling to reduce VRAM usage", "Higher steps (50-100) improve quality but increase generation time", "T5-XXL text encoder required - automatically linked from SD3.5" ] } }, "version": 0.4 }