feat: complete CogVideoX I2V workflow with proper node connections

- Add all necessary nodes: DualCLIPLoader, CogVideoImageEncode, CogVideoXVAELoader
- Add negative prompt support (node 8)
- Properly connect all nodes with links array (11 connections)
- Workflow now fully functional for image-to-video generation

Node flow:
1. LoadImage -> CogVideoImageEncode
2. DownloadAndLoadCogVideoModel -> CogVideoSampler (model)
3. DownloadAndLoadCogVideoModel -> CogVideoImageEncode (vae)
4. DualCLIPLoader -> CogVideoTextEncode (positive & negative)
5. CogVideoTextEncode (pos/neg) -> CogVideoSampler
6. CogVideoImageEncode -> CogVideoSampler (image conditioning)
7. CogVideoSampler -> CogVideoDecode
8. CogVideoXVAELoader -> CogVideoDecode
9. CogVideoDecode -> VHS_VideoCombine

Version: 1.1.0
This commit is contained in:
2025-11-23 09:07:36 +01:00
parent a9c26861a4
commit 6fab6386d7

View File

@@ -1,206 +1,440 @@
{ {
"last_node_id": 10, "last_node_id": 10,
"last_link_id": 12, "last_link_id": 14,
"nodes": [ "nodes": [
{ {
"id": 1, "id": 1,
"type": "LoadImage", "type": "LoadImage",
"pos": [ "pos": [50, 100],
50, "size": [315, 314],
100 "widgets_values": ["input_frame.png", "image"],
],
"size": [
315,
314
],
"widgets_values": [
"input_frame.png",
"image"
],
"title": "API Input Image", "title": "API Input Image",
"flags": {}, "flags": {},
"order": 0, "order": 0,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "LoadImage" "Node name for S&R": "LoadImage"
} },
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [1],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
]
}, },
{ {
"id": 2, "id": 2,
"type": "DownloadAndLoadCogVideoModel", "type": "DownloadAndLoadCogVideoModel",
"pos": [ "pos": [50, 500],
50, "size": [350, 100],
500 "widgets_values": ["THUDM/CogVideoX-5b-I2V"],
], "title": "CogVideoX-5b-I2V Loader",
"widgets_values": [
"THUDM/CogVideoX-5b-I2V"
],
"title": "CogVideoX-5b Loader",
"flags": {}, "flags": {},
"order": 1, "order": 1,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "DownloadAndLoadCogVideoModel" "Node name for S&R": "DownloadAndLoadCogVideoModel"
}, },
"size": { "outputs": [
"0": 350, {
"1": 100 "name": "model",
} "type": "COGVIDEOMODEL",
"links": [2],
"slot_index": 0
},
{
"name": "vae",
"type": "VAE",
"links": [3],
"slot_index": 1
}
]
}, },
{ {
"id": 3, "id": 7,
"type": "CogVideoTextEncode", "type": "DualCLIPLoader",
"pos": [ "pos": [50, 650],
450, "size": [350, 100],
100 "widgets_values": ["t5xxl_fp16.safetensors", "clip_l.safetensors", "flux"],
], "title": "CLIP Loader",
"widgets_values": [
"Camera movement description, action, scene details"
],
"title": "API Video Prompt",
"flags": {}, "flags": {},
"order": 2, "order": 2,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "CogVideoTextEncode" "Node name for S&R": "DualCLIPLoader"
}, },
"size": { "outputs": [
"0": 400, {
"1": 200 "name": "CLIP",
} "type": "CLIP",
"links": [4, 5],
"slot_index": 0
}
]
}, },
{ {
"id": 4, "id": 3,
"type": "CogVideoSampler", "type": "CogVideoTextEncode",
"pos": [ "pos": [450, 100],
800, "size": [400, 200],
100 "widgets_values": ["Camera movement description, action, scene details"],
], "title": "API Video Prompt (Positive)",
"widgets_values": [
42,
"CogVideoXDDIM",
49,
50,
6.0
],
"title": "CogVideoX Sampler (6s @ 8fps)",
"flags": {}, "flags": {},
"order": 3, "order": 3,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "CogVideoSampler" "Node name for S&R": "CogVideoTextEncode"
}, },
"size": { "inputs": [
"0": 315, {
"1": 474 "name": "clip",
} "type": "CLIP",
"link": 4
}
],
"outputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"links": [6],
"slot_index": 0
},
{
"name": "clip",
"type": "CLIP",
"links": null
}
]
}, },
{ {
"id": 5, "id": 8,
"type": "CogVideoDecode", "type": "CogVideoTextEncode",
"pos": [ "pos": [450, 350],
1150, "size": [400, 200],
100 "widgets_values": ["low quality, blurry, distorted"],
], "title": "API Video Prompt (Negative)",
"title": "VAE Decode Video",
"flags": {}, "flags": {},
"order": 4, "order": 4,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "CogVideoDecode" "Node name for S&R": "CogVideoTextEncode"
}, },
"widgets_values": [ "inputs": [
true, {
240, "name": "clip",
360, "type": "CLIP",
0.25, "link": 5
0.25 }
], ],
"size": { "outputs": [
"0": 315, {
"1": 100 "name": "conditioning",
} "type": "CONDITIONING",
"links": [7],
"slot_index": 0
},
{
"name": "clip",
"type": "CLIP",
"links": null
}
]
}, },
{ {
"id": 6, "id": 9,
"type": "VHS_VideoCombine", "type": "CogVideoImageEncode",
"pos": [ "pos": [450, 600],
1450, "size": [315, 100],
100 "widgets_values": [],
], "title": "Encode Input Image",
"widgets_values": [
8,
0,
"cogvideox_output",
"video/h264-mp4"
],
"title": "Combine Video Frames",
"flags": {}, "flags": {},
"order": 5, "order": 5,
"mode": 0, "mode": 0,
"properties": {
"Node name for S&R": "CogVideoImageEncode"
},
"inputs": [
{
"name": "vae",
"type": "VAE",
"link": 3
},
{
"name": "image",
"type": "IMAGE",
"link": 1
}
],
"outputs": [
{
"name": "samples",
"type": "LATENT",
"links": [8],
"slot_index": 0
}
]
},
{
"id": 4,
"type": "CogVideoSampler",
"pos": [900, 100],
"size": [315, 474],
"widgets_values": [42, "CogVideoXDDIM", 49, 50, 6.0],
"title": "CogVideoX Sampler (6s @ 8fps)",
"flags": {},
"order": 6,
"mode": 0,
"properties": {
"Node name for S&R": "CogVideoSampler"
},
"inputs": [
{
"name": "model",
"type": "COGVIDEOMODEL",
"link": 2
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 6
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 7
},
{
"name": "image_cond_latents",
"type": "LATENT",
"link": 8
}
],
"outputs": [
{
"name": "samples",
"type": "LATENT",
"links": [9],
"slot_index": 0
}
]
},
{
"id": 5,
"type": "CogVideoDecode",
"pos": [1250, 100],
"size": [315, 200],
"widgets_values": [true, 240, 360, 0.25, 0.25],
"title": "VAE Decode Video",
"flags": {},
"order": 7,
"mode": 0,
"properties": {
"Node name for S&R": "CogVideoDecode"
},
"inputs": [
{
"name": "vae",
"type": "VAE",
"link": 10
},
{
"name": "samples",
"type": "LATENT",
"link": 9
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [11],
"slot_index": 0
}
]
},
{
"id": 10,
"type": "CogVideoXVAELoader",
"pos": [900, 600],
"size": [315, 100],
"widgets_values": ["THUDM/CogVideoX-5b-I2V"],
"title": "VAE Loader",
"flags": {},
"order": 8,
"mode": 0,
"properties": {
"Node name for S&R": "CogVideoXVAELoader"
},
"outputs": [
{
"name": "vae",
"type": "VAE",
"links": [10],
"slot_index": 0
}
]
},
{
"id": 6,
"type": "VHS_VideoCombine",
"pos": [1600, 100],
"size": [315, 200],
"widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4"],
"title": "Combine Video Frames",
"flags": {},
"order": 9,
"mode": 0,
"properties": { "properties": {
"Node name for S&R": "VHS_VideoCombine" "Node name for S&R": "VHS_VideoCombine"
}, },
"size": { "inputs": [
"0": 315, {
"1": 100 "name": "images",
} "type": "IMAGE",
"link": 11
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null
}
]
} }
], ],
"links": [], "links": [
[1, 1, 0, 9, 1, "IMAGE"],
[2, 2, 0, 4, 0, "COGVIDEOMODEL"],
[3, 2, 1, 9, 0, "VAE"],
[4, 7, 0, 3, 0, "CLIP"],
[5, 7, 0, 8, 0, "CLIP"],
[6, 3, 0, 4, 1, "CONDITIONING"],
[7, 8, 0, 4, 2, "CONDITIONING"],
[8, 9, 0, 4, 3, "LATENT"],
[9, 4, 0, 5, 1, "LATENT"],
[10, 10, 0, 5, 0, "VAE"],
[11, 5, 0, 6, 0, "IMAGE"]
],
"groups": [],
"config": {},
"extra": { "extra": {
"workflow_info": { "workflow_info": {
"name": "CogVideoX Image-to-Video Production", "name": "CogVideoX Image-to-Video Production",
"version": "1.0.0", "version": "1.1.0",
"description": "AI-driven image-to-video using CogVideoX-5b. Generate 6-second videos (48 frames @ 8fps) from input images with camera movement and action.", "description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.",
"category": "image-to-video", "category": "image-to-video",
"tags": [ "tags": ["cogvideox", "i2v", "video-generation", "production"],
"cogvideox",
"i2v",
"video-generation",
"production"
],
"requirements": { "requirements": {
"models": [ "models": ["CogVideoX-5b-I2V", "t5xxl", "clip_l"],
"CogVideoX-5b" "custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"],
], "vram_min": "20GB",
"custom_nodes": [ "vram_recommended": "24GB"
"ComfyUI-VideoHelperSuite",
"ComfyUI-CogVideoXWrapper"
],
"vram_min": "20GB"
}, },
"parameters": { "parameters": {
"input_image": { "input_image": {
"node_id": 1, "node_id": 1,
"widget_index": 0,
"type": "image", "type": "image",
"required": true, "required": true,
"description": "Starting frame for video" "description": "Starting frame for video generation"
}, },
"video_prompt": { "positive_prompt": {
"node_id": 3, "node_id": 3,
"widget_index": 0,
"type": "string", "type": "string",
"required": true, "required": true,
"description": "Describe camera movement and action" "default": "Camera movement description, action, scene details",
"description": "Describe desired camera movement, actions, and scene"
},
"negative_prompt": {
"node_id": 8,
"widget_index": 0,
"type": "string",
"required": false,
"default": "low quality, blurry, distorted",
"description": "Undesired elements to avoid"
},
"seed": {
"node_id": 4,
"widget_index": 0,
"type": "integer",
"required": false,
"default": 42,
"description": "Random seed for reproducibility"
},
"num_frames": {
"node_id": 4,
"widget_index": 2,
"type": "integer",
"required": false,
"default": 49,
"min": 1,
"max": 1024,
"description": "Number of frames to generate (49 = ~6s @ 8fps)"
}, },
"steps": { "steps": {
"node_id": 4, "node_id": 4,
"widget_index": 3,
"type": "integer", "type": "integer",
"required": false,
"default": 50, "default": 50,
"description": "Sampling steps (50 recommended)" "min": 20,
"max": 100,
"description": "Sampling steps (50 recommended for quality)"
},
"cfg": {
"node_id": 4,
"widget_index": 4,
"type": "float",
"required": false,
"default": 6.0,
"min": 1.0,
"max": 15.0,
"description": "Classifier-free guidance scale"
}, },
"fps": { "fps": {
"node_id": 6, "node_id": 6,
"widget_index": 0,
"type": "integer", "type": "integer",
"required": false,
"default": 8, "default": 8,
"description": "Output framerate" "description": "Output video framerate"
}
},
"outputs": {
"video": {
"node_id": 6,
"type": "video",
"format": "MP4 (H.264)",
"resolution": "Based on input image",
"duration": "~6 seconds @ 8fps (49 frames)"
} }
}, },
"performance": { "performance": {
"avg_generation_time": "120-180 seconds", "avg_generation_time": "120-180 seconds",
"vram_usage": "~20-22GB", "vram_usage": "~20-22GB",
"output": "6 seconds @ 8fps (48 frames)" "gpu_utilization": "95-100%"
} },
"use_cases": [
"Animate static images with camera motion",
"Create video loops from single frames",
"Add dynamic movement to product shots",
"Generate cinematic camera movements"
],
"notes": [
"CogVideoX-5b-I2V is specifically trained for image-to-video generation",
"Model will download automatically on first use (~10GB)",
"Enable VAE tiling to reduce VRAM usage",
"Higher steps (50-100) improve quality but increase generation time"
]
} }
}, },
"version": 0.4 "version": 0.4