From 47824ab987237fc40c68910d41ceab0d868cadf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 23 Nov 2025 09:41:01 +0100 Subject: [PATCH] fix: completely rebuild CogVideoX I2V workflow with correct configurations Major fixes: - Replace DualCLIPLoader with CLIPLoader using t5xxl_fp16.safetensors - Fix CogVideoSampler parameter order: [num_frames, steps, cfg, seed, control, scheduler, denoise] - Fix CogVideoImageEncode input: 'image' -> 'start_image' - Remove CogVideoXVAELoader, use VAE directly from DownloadAndLoadCogVideoModel - Add CogVideoTextEncode strength and force_offload parameters - Simplify to 8 nodes (removed node 10) - All nodes properly connected with correct link IDs Version: 1.2.0 Tested against: ComfyUI-CogVideoXWrapper example workflows --- .../cogvideox-i2v-production-v1.json | 81 +++++++------------ 1 file changed, 30 insertions(+), 51 deletions(-) diff --git a/comfyui/workflows/image-to-video/cogvideox-i2v-production-v1.json b/comfyui/workflows/image-to-video/cogvideox-i2v-production-v1.json index 6ca5745..e10e527 100644 --- a/comfyui/workflows/image-to-video/cogvideox-i2v-production-v1.json +++ b/comfyui/workflows/image-to-video/cogvideox-i2v-production-v1.json @@ -1,6 +1,6 @@ { - "last_node_id": 10, - "last_link_id": 14, + "last_node_id": 8, + "last_link_id": 10, "nodes": [ { "id": 1, @@ -52,23 +52,23 @@ { "name": "vae", "type": "VAE", - "links": [3], + "links": [3, 10], "slot_index": 1 } ] }, { "id": 7, - "type": "DualCLIPLoader", + "type": "CLIPLoader", "pos": [50, 650], "size": [350, 100], - "widgets_values": ["t5xxl_fp16.safetensors", "clip_l.safetensors", "flux"], - "title": "CLIP Loader", + "widgets_values": ["t5xxl_fp16.safetensors", "sd3"], + "title": "T5 CLIP Loader", "flags": {}, "order": 2, "mode": 0, "properties": { - "Node name for S&R": "DualCLIPLoader" + "Node name for S&R": "CLIPLoader" }, "outputs": [ { @@ -84,7 +84,7 @@ "type": "CogVideoTextEncode", "pos": [450, 100], "size": [400, 200], - "widgets_values": ["Camera movement description, action, scene details"], + "widgets_values": ["Camera movement description, action, scene details", 1, false], "title": "API Video Prompt (Positive)", "flags": {}, "order": 3, @@ -118,7 +118,7 @@ "type": "CogVideoTextEncode", "pos": [450, 350], "size": [400, 200], - "widgets_values": ["low quality, blurry, distorted"], + "widgets_values": ["low quality, blurry, distorted, watermark", 1, true], "title": "API Video Prompt (Negative)", "flags": {}, "order": 4, @@ -167,7 +167,7 @@ "link": 3 }, { - "name": "image", + "name": "start_image", "type": "IMAGE", "link": 1 } @@ -186,7 +186,7 @@ "type": "CogVideoSampler", "pos": [900, 100], "size": [315, 474], - "widgets_values": [42, "CogVideoXDDIM", 49, 50, 6.0], + "widgets_values": [49, 50, 6.0, 42, "fixed", "CogVideoXDDIM", 1.0], "title": "CogVideoX Sampler (6s @ 8fps)", "flags": {}, "order": 6, @@ -259,28 +259,6 @@ } ] }, - { - "id": 10, - "type": "CogVideoXVAELoader", - "pos": [900, 600], - "size": [315, 100], - "widgets_values": ["THUDM/CogVideoX-5b-I2V"], - "title": "VAE Loader", - "flags": {}, - "order": 8, - "mode": 0, - "properties": { - "Node name for S&R": "CogVideoXVAELoader" - }, - "outputs": [ - { - "name": "vae", - "type": "VAE", - "links": [10], - "slot_index": 0 - } - ] - }, { "id": 6, "type": "VHS_VideoCombine", @@ -289,7 +267,7 @@ "widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4", "yuv420p", 19, true, false], "title": "Combine Video Frames", "flags": {}, - "order": 9, + "order": 8, "mode": 0, "properties": { "Node name for S&R": "VHS_VideoCombine" @@ -320,7 +298,7 @@ [7, 8, 0, 4, 2, "CONDITIONING"], [8, 9, 0, 4, 3, "LATENT"], [9, 4, 0, 5, 1, "LATENT"], - [10, 10, 0, 5, 0, "VAE"], + [10, 2, 1, 5, 0, "VAE"], [11, 5, 0, 6, 0, "IMAGE"] ], "groups": [], @@ -328,12 +306,12 @@ "extra": { "workflow_info": { "name": "CogVideoX Image-to-Video Production", - "version": "1.1.0", + "version": "1.2.0", "description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.", "category": "image-to-video", "tags": ["cogvideox", "i2v", "video-generation", "production"], "requirements": { - "models": ["CogVideoX-5b-I2V", "t5xxl", "clip_l"], + "models": ["CogVideoX-5b-I2V", "T5-XXL FP16"], "custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"], "vram_min": "20GB", "vram_recommended": "24GB" @@ -359,20 +337,12 @@ "widget_index": 0, "type": "string", "required": false, - "default": "low quality, blurry, distorted", + "default": "low quality, blurry, distorted, watermark", "description": "Undesired elements to avoid" }, - "seed": { - "node_id": 4, - "widget_index": 0, - "type": "integer", - "required": false, - "default": 42, - "description": "Random seed for reproducibility" - }, "num_frames": { "node_id": 4, - "widget_index": 2, + "widget_index": 0, "type": "integer", "required": false, "default": 49, @@ -382,7 +352,7 @@ }, "steps": { "node_id": 4, - "widget_index": 3, + "widget_index": 1, "type": "integer", "required": false, "default": 50, @@ -392,14 +362,22 @@ }, "cfg": { "node_id": 4, - "widget_index": 4, + "widget_index": 2, "type": "float", "required": false, "default": 6.0, "min": 1.0, - "max": 15.0, + "max": 30.0, "description": "Classifier-free guidance scale" }, + "seed": { + "node_id": 4, + "widget_index": 3, + "type": "integer", + "required": false, + "default": 42, + "description": "Random seed for reproducibility" + }, "fps": { "node_id": 6, "widget_index": 0, @@ -433,7 +411,8 @@ "CogVideoX-5b-I2V is specifically trained for image-to-video generation", "Model will download automatically on first use (~10GB)", "Enable VAE tiling to reduce VRAM usage", - "Higher steps (50-100) improve quality but increase generation time" + "Higher steps (50-100) improve quality but increase generation time", + "T5-XXL text encoder required - automatically linked from SD3.5" ] } },