fix: completely rebuild CogVideoX I2V workflow with correct configurations

Major fixes:
- Replace DualCLIPLoader with CLIPLoader using t5xxl_fp16.safetensors
- Fix CogVideoSampler parameter order: [num_frames, steps, cfg, seed, control, scheduler, denoise]
- Fix CogVideoImageEncode input: 'image' -> 'start_image'
- Remove CogVideoXVAELoader, use VAE directly from DownloadAndLoadCogVideoModel
- Add CogVideoTextEncode strength and force_offload parameters
- Simplify to 8 nodes (removed node 10)
- All nodes properly connected with correct link IDs

Version: 1.2.0
Tested against: ComfyUI-CogVideoXWrapper example workflows
This commit is contained in:
2025-11-23 09:41:01 +01:00
parent 80a81aa12f
commit 47824ab987

View File

@@ -1,6 +1,6 @@
{ {
"last_node_id": 10, "last_node_id": 8,
"last_link_id": 14, "last_link_id": 10,
"nodes": [ "nodes": [
{ {
"id": 1, "id": 1,
@@ -52,23 +52,23 @@
{ {
"name": "vae", "name": "vae",
"type": "VAE", "type": "VAE",
"links": [3], "links": [3, 10],
"slot_index": 1 "slot_index": 1
} }
] ]
}, },
{ {
"id": 7, "id": 7,
"type": "DualCLIPLoader", "type": "CLIPLoader",
"pos": [50, 650], "pos": [50, 650],
"size": [350, 100], "size": [350, 100],
"widgets_values": ["t5xxl_fp16.safetensors", "clip_l.safetensors", "flux"], "widgets_values": ["t5xxl_fp16.safetensors", "sd3"],
"title": "CLIP Loader", "title": "T5 CLIP Loader",
"flags": {}, "flags": {},
"order": 2, "order": 2,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "DualCLIPLoader" "Node name for S&R": "CLIPLoader"
}, },
"outputs": [ "outputs": [
{ {
@@ -84,7 +84,7 @@
"type": "CogVideoTextEncode", "type": "CogVideoTextEncode",
"pos": [450, 100], "pos": [450, 100],
"size": [400, 200], "size": [400, 200],
"widgets_values": ["Camera movement description, action, scene details"], "widgets_values": ["Camera movement description, action, scene details", 1, false],
"title": "API Video Prompt (Positive)", "title": "API Video Prompt (Positive)",
"flags": {}, "flags": {},
"order": 3, "order": 3,
@@ -118,7 +118,7 @@
"type": "CogVideoTextEncode", "type": "CogVideoTextEncode",
"pos": [450, 350], "pos": [450, 350],
"size": [400, 200], "size": [400, 200],
"widgets_values": ["low quality, blurry, distorted"], "widgets_values": ["low quality, blurry, distorted, watermark", 1, true],
"title": "API Video Prompt (Negative)", "title": "API Video Prompt (Negative)",
"flags": {}, "flags": {},
"order": 4, "order": 4,
@@ -167,7 +167,7 @@
"link": 3 "link": 3
}, },
{ {
"name": "image", "name": "start_image",
"type": "IMAGE", "type": "IMAGE",
"link": 1 "link": 1
} }
@@ -186,7 +186,7 @@
"type": "CogVideoSampler", "type": "CogVideoSampler",
"pos": [900, 100], "pos": [900, 100],
"size": [315, 474], "size": [315, 474],
"widgets_values": [42, "CogVideoXDDIM", 49, 50, 6.0], "widgets_values": [49, 50, 6.0, 42, "fixed", "CogVideoXDDIM", 1.0],
"title": "CogVideoX Sampler (6s @ 8fps)", "title": "CogVideoX Sampler (6s @ 8fps)",
"flags": {}, "flags": {},
"order": 6, "order": 6,
@@ -259,28 +259,6 @@
} }
] ]
}, },
{
"id": 10,
"type": "CogVideoXVAELoader",
"pos": [900, 600],
"size": [315, 100],
"widgets_values": ["THUDM/CogVideoX-5b-I2V"],
"title": "VAE Loader",
"flags": {},
"order": 8,
"mode": 0,
"properties": {
"Node name for S&R": "CogVideoXVAELoader"
},
"outputs": [
{
"name": "vae",
"type": "VAE",
"links": [10],
"slot_index": 0
}
]
},
{ {
"id": 6, "id": 6,
"type": "VHS_VideoCombine", "type": "VHS_VideoCombine",
@@ -289,7 +267,7 @@
"widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4", "yuv420p", 19, true, false], "widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4", "yuv420p", 19, true, false],
"title": "Combine Video Frames", "title": "Combine Video Frames",
"flags": {}, "flags": {},
"order": 9, "order": 8,
"mode": 0, "mode": 0,
"properties": { "properties": {
"Node name for S&R": "VHS_VideoCombine" "Node name for S&R": "VHS_VideoCombine"
@@ -320,7 +298,7 @@
[7, 8, 0, 4, 2, "CONDITIONING"], [7, 8, 0, 4, 2, "CONDITIONING"],
[8, 9, 0, 4, 3, "LATENT"], [8, 9, 0, 4, 3, "LATENT"],
[9, 4, 0, 5, 1, "LATENT"], [9, 4, 0, 5, 1, "LATENT"],
[10, 10, 0, 5, 0, "VAE"], [10, 2, 1, 5, 0, "VAE"],
[11, 5, 0, 6, 0, "IMAGE"] [11, 5, 0, 6, 0, "IMAGE"]
], ],
"groups": [], "groups": [],
@@ -328,12 +306,12 @@
"extra": { "extra": {
"workflow_info": { "workflow_info": {
"name": "CogVideoX Image-to-Video Production", "name": "CogVideoX Image-to-Video Production",
"version": "1.1.0", "version": "1.2.0",
"description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.", "description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.",
"category": "image-to-video", "category": "image-to-video",
"tags": ["cogvideox", "i2v", "video-generation", "production"], "tags": ["cogvideox", "i2v", "video-generation", "production"],
"requirements": { "requirements": {
"models": ["CogVideoX-5b-I2V", "t5xxl", "clip_l"], "models": ["CogVideoX-5b-I2V", "T5-XXL FP16"],
"custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"], "custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"],
"vram_min": "20GB", "vram_min": "20GB",
"vram_recommended": "24GB" "vram_recommended": "24GB"
@@ -359,20 +337,12 @@
"widget_index": 0, "widget_index": 0,
"type": "string", "type": "string",
"required": false, "required": false,
"default": "low quality, blurry, distorted", "default": "low quality, blurry, distorted, watermark",
"description": "Undesired elements to avoid" "description": "Undesired elements to avoid"
}, },
"seed": {
"node_id": 4,
"widget_index": 0,
"type": "integer",
"required": false,
"default": 42,
"description": "Random seed for reproducibility"
},
"num_frames": { "num_frames": {
"node_id": 4, "node_id": 4,
"widget_index": 2, "widget_index": 0,
"type": "integer", "type": "integer",
"required": false, "required": false,
"default": 49, "default": 49,
@@ -382,7 +352,7 @@
}, },
"steps": { "steps": {
"node_id": 4, "node_id": 4,
"widget_index": 3, "widget_index": 1,
"type": "integer", "type": "integer",
"required": false, "required": false,
"default": 50, "default": 50,
@@ -392,14 +362,22 @@
}, },
"cfg": { "cfg": {
"node_id": 4, "node_id": 4,
"widget_index": 4, "widget_index": 2,
"type": "float", "type": "float",
"required": false, "required": false,
"default": 6.0, "default": 6.0,
"min": 1.0, "min": 1.0,
"max": 15.0, "max": 30.0,
"description": "Classifier-free guidance scale" "description": "Classifier-free guidance scale"
}, },
"seed": {
"node_id": 4,
"widget_index": 3,
"type": "integer",
"required": false,
"default": 42,
"description": "Random seed for reproducibility"
},
"fps": { "fps": {
"node_id": 6, "node_id": 6,
"widget_index": 0, "widget_index": 0,
@@ -433,7 +411,8 @@
"CogVideoX-5b-I2V is specifically trained for image-to-video generation", "CogVideoX-5b-I2V is specifically trained for image-to-video generation",
"Model will download automatically on first use (~10GB)", "Model will download automatically on first use (~10GB)",
"Enable VAE tiling to reduce VRAM usage", "Enable VAE tiling to reduce VRAM usage",
"Higher steps (50-100) improve quality but increase generation time" "Higher steps (50-100) improve quality but increase generation time",
"T5-XXL text encoder required - automatically linked from SD3.5"
] ]
} }
}, },