feat: complete CogVideoX I2V workflow with proper node connections
- Add all necessary nodes: DualCLIPLoader, CogVideoImageEncode, CogVideoXVAELoader - Add negative prompt support (node 8) - Properly connect all nodes with links array (11 connections) - Workflow now fully functional for image-to-video generation Node flow: 1. LoadImage -> CogVideoImageEncode 2. DownloadAndLoadCogVideoModel -> CogVideoSampler (model) 3. DownloadAndLoadCogVideoModel -> CogVideoImageEncode (vae) 4. DualCLIPLoader -> CogVideoTextEncode (positive & negative) 5. CogVideoTextEncode (pos/neg) -> CogVideoSampler 6. CogVideoImageEncode -> CogVideoSampler (image conditioning) 7. CogVideoSampler -> CogVideoDecode 8. CogVideoXVAELoader -> CogVideoDecode 9. CogVideoDecode -> VHS_VideoCombine Version: 1.1.0
This commit is contained in:
@@ -1,207 +1,441 @@
|
||||
{
|
||||
"last_node_id": 10,
|
||||
"last_link_id": 12,
|
||||
"last_link_id": 14,
|
||||
"nodes": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "LoadImage",
|
||||
"pos": [
|
||||
50,
|
||||
100
|
||||
],
|
||||
"size": [
|
||||
315,
|
||||
314
|
||||
],
|
||||
"widgets_values": [
|
||||
"input_frame.png",
|
||||
"image"
|
||||
],
|
||||
"pos": [50, 100],
|
||||
"size": [315, 314],
|
||||
"widgets_values": ["input_frame.png", "image"],
|
||||
"title": "API Input Image",
|
||||
"flags": {},
|
||||
"order": 0,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "LoadImage"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "IMAGE",
|
||||
"type": "IMAGE",
|
||||
"links": [1],
|
||||
"slot_index": 0
|
||||
},
|
||||
{
|
||||
"name": "MASK",
|
||||
"type": "MASK",
|
||||
"links": null
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "DownloadAndLoadCogVideoModel",
|
||||
"pos": [
|
||||
50,
|
||||
500
|
||||
],
|
||||
"widgets_values": [
|
||||
"THUDM/CogVideoX-5b-I2V"
|
||||
],
|
||||
"title": "CogVideoX-5b Loader",
|
||||
"pos": [50, 500],
|
||||
"size": [350, 100],
|
||||
"widgets_values": ["THUDM/CogVideoX-5b-I2V"],
|
||||
"title": "CogVideoX-5b-I2V Loader",
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
||||
},
|
||||
"size": {
|
||||
"0": 350,
|
||||
"1": 100
|
||||
}
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "COGVIDEOMODEL",
|
||||
"links": [2],
|
||||
"slot_index": 0
|
||||
},
|
||||
{
|
||||
"name": "vae",
|
||||
"type": "VAE",
|
||||
"links": [3],
|
||||
"slot_index": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "CogVideoTextEncode",
|
||||
"pos": [
|
||||
450,
|
||||
100
|
||||
],
|
||||
"widgets_values": [
|
||||
"Camera movement description, action, scene details"
|
||||
],
|
||||
"title": "API Video Prompt",
|
||||
"id": 7,
|
||||
"type": "DualCLIPLoader",
|
||||
"pos": [50, 650],
|
||||
"size": [350, 100],
|
||||
"widgets_values": ["t5xxl_fp16.safetensors", "clip_l.safetensors", "flux"],
|
||||
"title": "CLIP Loader",
|
||||
"flags": {},
|
||||
"order": 2,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoTextEncode"
|
||||
"Node name for S&R": "DualCLIPLoader"
|
||||
},
|
||||
"size": {
|
||||
"0": 400,
|
||||
"1": 200
|
||||
}
|
||||
"outputs": [
|
||||
{
|
||||
"name": "CLIP",
|
||||
"type": "CLIP",
|
||||
"links": [4, 5],
|
||||
"slot_index": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "CogVideoSampler",
|
||||
"pos": [
|
||||
800,
|
||||
100
|
||||
],
|
||||
"widgets_values": [
|
||||
42,
|
||||
"CogVideoXDDIM",
|
||||
49,
|
||||
50,
|
||||
6.0
|
||||
],
|
||||
"title": "CogVideoX Sampler (6s @ 8fps)",
|
||||
"id": 3,
|
||||
"type": "CogVideoTextEncode",
|
||||
"pos": [450, 100],
|
||||
"size": [400, 200],
|
||||
"widgets_values": ["Camera movement description, action, scene details"],
|
||||
"title": "API Video Prompt (Positive)",
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoSampler"
|
||||
"Node name for S&R": "CogVideoTextEncode"
|
||||
},
|
||||
"size": {
|
||||
"0": 315,
|
||||
"1": 474
|
||||
}
|
||||
"inputs": [
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"link": 4
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "conditioning",
|
||||
"type": "CONDITIONING",
|
||||
"links": [6],
|
||||
"slot_index": 0
|
||||
},
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"links": null
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "CogVideoDecode",
|
||||
"pos": [
|
||||
1150,
|
||||
100
|
||||
],
|
||||
"title": "VAE Decode Video",
|
||||
"id": 8,
|
||||
"type": "CogVideoTextEncode",
|
||||
"pos": [450, 350],
|
||||
"size": [400, 200],
|
||||
"widgets_values": ["low quality, blurry, distorted"],
|
||||
"title": "API Video Prompt (Negative)",
|
||||
"flags": {},
|
||||
"order": 4,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoDecode"
|
||||
"Node name for S&R": "CogVideoTextEncode"
|
||||
},
|
||||
"widgets_values": [
|
||||
true,
|
||||
240,
|
||||
360,
|
||||
0.25,
|
||||
0.25
|
||||
"inputs": [
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"link": 5
|
||||
}
|
||||
],
|
||||
"size": {
|
||||
"0": 315,
|
||||
"1": 100
|
||||
}
|
||||
"outputs": [
|
||||
{
|
||||
"name": "conditioning",
|
||||
"type": "CONDITIONING",
|
||||
"links": [7],
|
||||
"slot_index": 0
|
||||
},
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"links": null
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "VHS_VideoCombine",
|
||||
"pos": [
|
||||
1450,
|
||||
100
|
||||
],
|
||||
"widgets_values": [
|
||||
8,
|
||||
0,
|
||||
"cogvideox_output",
|
||||
"video/h264-mp4"
|
||||
],
|
||||
"title": "Combine Video Frames",
|
||||
"id": 9,
|
||||
"type": "CogVideoImageEncode",
|
||||
"pos": [450, 600],
|
||||
"size": [315, 100],
|
||||
"widgets_values": [],
|
||||
"title": "Encode Input Image",
|
||||
"flags": {},
|
||||
"order": 5,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoImageEncode"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "vae",
|
||||
"type": "VAE",
|
||||
"link": 3
|
||||
},
|
||||
{
|
||||
"name": "image",
|
||||
"type": "IMAGE",
|
||||
"link": 1
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"links": [8],
|
||||
"slot_index": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "CogVideoSampler",
|
||||
"pos": [900, 100],
|
||||
"size": [315, 474],
|
||||
"widgets_values": [42, "CogVideoXDDIM", 49, 50, 6.0],
|
||||
"title": "CogVideoX Sampler (6s @ 8fps)",
|
||||
"flags": {},
|
||||
"order": 6,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoSampler"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "COGVIDEOMODEL",
|
||||
"link": 2
|
||||
},
|
||||
{
|
||||
"name": "positive",
|
||||
"type": "CONDITIONING",
|
||||
"link": 6
|
||||
},
|
||||
{
|
||||
"name": "negative",
|
||||
"type": "CONDITIONING",
|
||||
"link": 7
|
||||
},
|
||||
{
|
||||
"name": "image_cond_latents",
|
||||
"type": "LATENT",
|
||||
"link": 8
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"links": [9],
|
||||
"slot_index": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "CogVideoDecode",
|
||||
"pos": [1250, 100],
|
||||
"size": [315, 200],
|
||||
"widgets_values": [true, 240, 360, 0.25, 0.25],
|
||||
"title": "VAE Decode Video",
|
||||
"flags": {},
|
||||
"order": 7,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoDecode"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "vae",
|
||||
"type": "VAE",
|
||||
"link": 10
|
||||
},
|
||||
{
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"link": 9
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "images",
|
||||
"type": "IMAGE",
|
||||
"links": [11],
|
||||
"slot_index": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"type": "CogVideoXVAELoader",
|
||||
"pos": [900, 600],
|
||||
"size": [315, 100],
|
||||
"widgets_values": ["THUDM/CogVideoX-5b-I2V"],
|
||||
"title": "VAE Loader",
|
||||
"flags": {},
|
||||
"order": 8,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "CogVideoXVAELoader"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "vae",
|
||||
"type": "VAE",
|
||||
"links": [10],
|
||||
"slot_index": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "VHS_VideoCombine",
|
||||
"pos": [1600, 100],
|
||||
"size": [315, 200],
|
||||
"widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4"],
|
||||
"title": "Combine Video Frames",
|
||||
"flags": {},
|
||||
"order": 9,
|
||||
"mode": 0,
|
||||
"properties": {
|
||||
"Node name for S&R": "VHS_VideoCombine"
|
||||
},
|
||||
"size": {
|
||||
"0": 315,
|
||||
"1": 100
|
||||
}
|
||||
"inputs": [
|
||||
{
|
||||
"name": "images",
|
||||
"type": "IMAGE",
|
||||
"link": 11
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "Filenames",
|
||||
"type": "VHS_FILENAMES",
|
||||
"links": null
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"links": [],
|
||||
"links": [
|
||||
[1, 1, 0, 9, 1, "IMAGE"],
|
||||
[2, 2, 0, 4, 0, "COGVIDEOMODEL"],
|
||||
[3, 2, 1, 9, 0, "VAE"],
|
||||
[4, 7, 0, 3, 0, "CLIP"],
|
||||
[5, 7, 0, 8, 0, "CLIP"],
|
||||
[6, 3, 0, 4, 1, "CONDITIONING"],
|
||||
[7, 8, 0, 4, 2, "CONDITIONING"],
|
||||
[8, 9, 0, 4, 3, "LATENT"],
|
||||
[9, 4, 0, 5, 1, "LATENT"],
|
||||
[10, 10, 0, 5, 0, "VAE"],
|
||||
[11, 5, 0, 6, 0, "IMAGE"]
|
||||
],
|
||||
"groups": [],
|
||||
"config": {},
|
||||
"extra": {
|
||||
"workflow_info": {
|
||||
"name": "CogVideoX Image-to-Video Production",
|
||||
"version": "1.0.0",
|
||||
"description": "AI-driven image-to-video using CogVideoX-5b. Generate 6-second videos (48 frames @ 8fps) from input images with camera movement and action.",
|
||||
"version": "1.1.0",
|
||||
"description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.",
|
||||
"category": "image-to-video",
|
||||
"tags": [
|
||||
"cogvideox",
|
||||
"i2v",
|
||||
"video-generation",
|
||||
"production"
|
||||
],
|
||||
"tags": ["cogvideox", "i2v", "video-generation", "production"],
|
||||
"requirements": {
|
||||
"models": [
|
||||
"CogVideoX-5b"
|
||||
],
|
||||
"custom_nodes": [
|
||||
"ComfyUI-VideoHelperSuite",
|
||||
"ComfyUI-CogVideoXWrapper"
|
||||
],
|
||||
"vram_min": "20GB"
|
||||
"models": ["CogVideoX-5b-I2V", "t5xxl", "clip_l"],
|
||||
"custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"],
|
||||
"vram_min": "20GB",
|
||||
"vram_recommended": "24GB"
|
||||
},
|
||||
"parameters": {
|
||||
"input_image": {
|
||||
"node_id": 1,
|
||||
"widget_index": 0,
|
||||
"type": "image",
|
||||
"required": true,
|
||||
"description": "Starting frame for video"
|
||||
"description": "Starting frame for video generation"
|
||||
},
|
||||
"video_prompt": {
|
||||
"positive_prompt": {
|
||||
"node_id": 3,
|
||||
"widget_index": 0,
|
||||
"type": "string",
|
||||
"required": true,
|
||||
"description": "Describe camera movement and action"
|
||||
"default": "Camera movement description, action, scene details",
|
||||
"description": "Describe desired camera movement, actions, and scene"
|
||||
},
|
||||
"negative_prompt": {
|
||||
"node_id": 8,
|
||||
"widget_index": 0,
|
||||
"type": "string",
|
||||
"required": false,
|
||||
"default": "low quality, blurry, distorted",
|
||||
"description": "Undesired elements to avoid"
|
||||
},
|
||||
"seed": {
|
||||
"node_id": 4,
|
||||
"widget_index": 0,
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"default": 42,
|
||||
"description": "Random seed for reproducibility"
|
||||
},
|
||||
"num_frames": {
|
||||
"node_id": 4,
|
||||
"widget_index": 2,
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"default": 49,
|
||||
"min": 1,
|
||||
"max": 1024,
|
||||
"description": "Number of frames to generate (49 = ~6s @ 8fps)"
|
||||
},
|
||||
"steps": {
|
||||
"node_id": 4,
|
||||
"widget_index": 3,
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"default": 50,
|
||||
"description": "Sampling steps (50 recommended)"
|
||||
"min": 20,
|
||||
"max": 100,
|
||||
"description": "Sampling steps (50 recommended for quality)"
|
||||
},
|
||||
"cfg": {
|
||||
"node_id": 4,
|
||||
"widget_index": 4,
|
||||
"type": "float",
|
||||
"required": false,
|
||||
"default": 6.0,
|
||||
"min": 1.0,
|
||||
"max": 15.0,
|
||||
"description": "Classifier-free guidance scale"
|
||||
},
|
||||
"fps": {
|
||||
"node_id": 6,
|
||||
"widget_index": 0,
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"default": 8,
|
||||
"description": "Output framerate"
|
||||
"description": "Output video framerate"
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"video": {
|
||||
"node_id": 6,
|
||||
"type": "video",
|
||||
"format": "MP4 (H.264)",
|
||||
"resolution": "Based on input image",
|
||||
"duration": "~6 seconds @ 8fps (49 frames)"
|
||||
}
|
||||
},
|
||||
"performance": {
|
||||
"avg_generation_time": "120-180 seconds",
|
||||
"vram_usage": "~20-22GB",
|
||||
"output": "6 seconds @ 8fps (48 frames)"
|
||||
}
|
||||
"gpu_utilization": "95-100%"
|
||||
},
|
||||
"use_cases": [
|
||||
"Animate static images with camera motion",
|
||||
"Create video loops from single frames",
|
||||
"Add dynamic movement to product shots",
|
||||
"Generate cinematic camera movements"
|
||||
],
|
||||
"notes": [
|
||||
"CogVideoX-5b-I2V is specifically trained for image-to-video generation",
|
||||
"Model will download automatically on first use (~10GB)",
|
||||
"Enable VAE tiling to reduce VRAM usage",
|
||||
"Higher steps (50-100) improve quality but increase generation time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"version": 0.4
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user