From 8b4f141d8227154636f8879f849c9a7fb81f2e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 23 Nov 2025 10:15:43 +0100 Subject: [PATCH] fix: rebuild SVD-XT workflow with correct node types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace DiffusersLoader with ImageOnlyCheckpointLoader - Replace SVDSampler with SVD_img2vid_Conditioning + KSampler - Add VideoLinearCFGGuidance for temporal consistency - Add all node connections in links array - Configure VHS_VideoCombine with correct parameters (25 frames) - Increase steps to 30 for better quality with longer video 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../svd-xt-i2v-production-v1.json | 447 ++++++++++++++---- 1 file changed, 344 insertions(+), 103 deletions(-) diff --git a/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json b/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json index ef392d7..df57d25 100644 --- a/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json +++ b/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json @@ -1,18 +1,13 @@ { - "last_node_id": 8, + "last_node_id": 7, "last_link_id": 10, "nodes": [ { "id": 1, "type": "LoadImage", - "pos": [ - 50, - 100 - ], - "widgets_values": [ - "input_frame.png", - "image" - ], + "pos": [50, 100], + "size": [315, 314], + "widgets_values": ["input_frame.png", "image"], "title": "API Input Image", "flags": {}, "order": 0, @@ -20,157 +15,403 @@ "properties": { "Node name for S&R": "LoadImage" }, - "size": { - "0": 350, - "1": 100 - } + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [1], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ] }, { "id": 2, - "type": "DiffusersLoader", - "pos": [ - 50, - 400 - ], - "widgets_values": [ - "diffusion_models/stable-video-diffusion-img2vid-xt" - ], + "type": "ImageOnlyCheckpointLoader", + "pos": [50, 500], + "size": [350, 100], + "widgets_values": ["svd_xt.safetensors"], "title": "SVD-XT Model Loader", "flags": {}, "order": 1, "mode": 0, "properties": { - "Node name for S&R": "DiffusersLoader" + "Node name for S&R": "ImageOnlyCheckpointLoader" }, - "size": { - "0": 350, - "1": 100 - } + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [2], + "slot_index": 0 + }, + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [3], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [4, 5], + "slot_index": 2 + } + ] }, { "id": 3, - "type": "SVDSampler", - "pos": [ - 450, - 100 - ], - "widgets_values": [ - 42, - "fixed", - 30, - 25, - 127, - 0.02 - ], - "title": "SVD-XT Sampler (25 frames)", + "type": "VideoLinearCFGGuidance", + "pos": [450, 500], + "size": [315, 100], + "widgets_values": [1.0], + "title": "Linear CFG Guidance", "flags": {}, "order": 2, "mode": 0, "properties": { - "Node name for S&R": "SVDSampler" + "Node name for S&R": "VideoLinearCFGGuidance" }, - "size": { - "0": 315, - "1": 474 - } + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 2 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [6], + "slot_index": 0 + } + ] }, { "id": 4, - "type": "VAEDecode", - "pos": [ - 800, - 100 - ], - "title": "VAE Decode Video", + "type": "SVD_img2vid_Conditioning", + "pos": [450, 100], + "size": [315, 350], + "widgets_values": [1024, 576, 25, 127, 6, 0.0], + "title": "SVD-XT Image-to-Video Conditioning (25 frames)", "flags": {}, "order": 3, "mode": 0, "properties": { - "Node name for S&R": "VAEDecode" + "Node name for S&R": "SVD_img2vid_Conditioning" }, - "size": { - "0": 315, - "1": 100 - } + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 3 + }, + { + "name": "init_image", + "type": "IMAGE", + "link": 1 + }, + { + "name": "vae", + "type": "VAE", + "link": 4 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [7], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [8], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [9], + "slot_index": 2 + } + ] }, { "id": 5, - "type": "VHS_VideoCombine", - "pos": [ - 1100, - 100 - ], - "widgets_values": [ - 6, - 0, - "svd_xt_output", - "video/h264-mp4" - ], - "title": "Combine Frames", + "type": "KSampler", + "pos": [800, 100], + "size": [315, 474], + "widgets_values": [42, "fixed", 30, 6.0, "euler", "karras", 1.0], + "title": "KSampler (30 steps)", "flags": {}, "order": 4, "mode": 0, + "properties": { + "Node name for S&R": "KSampler" + }, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 6 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 7 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 8 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 9 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [10], + "slot_index": 0 + } + ] + }, + { + "id": 6, + "type": "VAEDecode", + "pos": [1150, 100], + "size": [210, 46], + "widgets_values": [], + "title": "VAE Decode Video Frames", + "flags": {}, + "order": 5, + "mode": 0, + "properties": { + "Node name for S&R": "VAEDecode" + }, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 10 + }, + { + "name": "vae", + "type": "VAE", + "link": 5 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [11], + "slot_index": 0 + } + ] + }, + { + "id": 7, + "type": "VHS_VideoCombine", + "pos": [1400, 100], + "size": [315, 200], + "widgets_values": [6, 0, "svd_xt_output", "video/h264-mp4", false, true], + "title": "Combine Video Frames", + "flags": {}, + "order": 6, + "mode": 0, "properties": { "Node name for S&R": "VHS_VideoCombine" }, - "size": { - "0": 315, - "1": 100 - } + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 11 + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null + } + ] } ], - "links": [], + "links": [ + [1, 1, 0, 4, 1, "IMAGE"], + [2, 2, 0, 3, 0, "MODEL"], + [3, 2, 1, 4, 0, "CLIP_VISION"], + [4, 2, 2, 4, 2, "VAE"], + [5, 2, 2, 6, 1, "VAE"], + [6, 3, 0, 5, 0, "MODEL"], + [7, 4, 0, 5, 1, "CONDITIONING"], + [8, 4, 1, 5, 2, "CONDITIONING"], + [9, 4, 2, 5, 3, "LATENT"], + [10, 5, 0, 6, 0, "LATENT"], + [11, 6, 0, 7, 0, "IMAGE"] + ], + "groups": [], + "config": {}, "extra": { "workflow_info": { "name": "Stable Video Diffusion XT Image-to-Video Production", - "version": "1.0.0", + "version": "1.2.0", "description": "Extended animation using SVD-XT. Generate 25-frame video for longer animations with smooth motion.", "category": "image-to-video", - "tags": [ - "svd-xt", - "stable-video-diffusion", - "i2v", - "extended", - "production" - ], + "tags": ["svd-xt", "stable-video-diffusion", "i2v", "extended", "production"], "requirements": { - "models": [ - "stable-video-diffusion-img2vid-xt" - ], - "custom_nodes": [ - "ComfyUI-VideoHelperSuite" - ], - "vram_min": "18GB" + "models": ["SVD-XT"], + "custom_nodes": ["ComfyUI-VideoHelperSuite"], + "vram_min": "18GB", + "vram_recommended": "20GB" }, "parameters": { "input_image": { "node_id": 1, + "widget_index": 0, "type": "image", - "required": true + "required": true, + "description": "Starting frame for video generation (1024x576 recommended)" + }, + "width": { + "node_id": 4, + "widget_index": 0, + "type": "integer", + "required": false, + "default": 1024, + "min": 16, + "max": 16384, + "description": "Output video width" + }, + "height": { + "node_id": 4, + "widget_index": 1, + "type": "integer", + "required": false, + "default": 576, + "min": 16, + "max": 16384, + "description": "Output video height" + }, + "video_frames": { + "node_id": 4, + "widget_index": 2, + "type": "integer", + "required": false, + "default": 25, + "min": 1, + "max": 4096, + "description": "Number of frames to generate (25 for SVD-XT)" + }, + "motion_bucket_id": { + "node_id": 4, + "widget_index": 3, + "type": "integer", + "required": false, + "default": 127, + "min": 1, + "max": 1023, + "description": "Motion amount (higher = more motion)" + }, + "fps": { + "node_id": 4, + "widget_index": 4, + "type": "integer", + "required": false, + "default": 6, + "min": 1, + "max": 1024, + "description": "Frames per second for conditioning" + }, + "augmentation_level": { + "node_id": 4, + "widget_index": 5, + "type": "float", + "required": false, + "default": 0.0, + "min": 0.0, + "max": 10.0, + "description": "Noise augmentation level" }, "steps": { - "node_id": 3, + "node_id": 5, + "widget_index": 2, "type": "integer", - "default": 30 + "required": false, + "default": 30, + "min": 1, + "max": 150, + "description": "Sampling steps (30 recommended)" }, - "frames": { - "node_id": 3, - "type": "integer", - "default": 25, - "description": "Number of output frames" + "cfg": { + "node_id": 5, + "widget_index": 3, + "type": "float", + "required": false, + "default": 6.0, + "min": 0.0, + "max": 30.0, + "description": "Classifier-free guidance scale" }, - "motion_bucket": { - "node_id": 3, + "seed": { + "node_id": 5, + "widget_index": 0, "type": "integer", - "default": 127 + "required": false, + "default": 42, + "description": "Random seed for reproducibility" + }, + "output_fps": { + "node_id": 7, + "widget_index": 0, + "type": "integer", + "required": false, + "default": 6, + "description": "Output video framerate" + } + }, + "outputs": { + "video": { + "node_id": 7, + "type": "video", + "format": "MP4 (H.264)", + "resolution": "1024x576 (configurable)", + "duration": "~4.2 seconds @ 6fps (25 frames)" } }, "performance": { "avg_generation_time": "40-55 seconds", - "vram_usage": "~16-18GB", - "output": "25 frames (~4.2s @ 6fps)" - } + "vram_usage": "~18-20GB", + "gpu_utilization": "95-100%" + }, + "use_cases": [ + "Extended animations with smooth motion", + "Longer video loops from single frames", + "Cinematic camera movements", + "Product animation showcases" + ], + "notes": [ + "SVD-XT generates 25 frames vs 14 frames in base SVD", + "Requires more VRAM (~18GB vs ~16GB)", + "Model auto-downloads on first use (~9GB)", + "Recommended resolution: 1024x576 (16:9)", + "Higher motion_bucket_id = more movement", + "Linear CFG guidance improves temporal consistency" + ] } }, "version": 0.4 -} \ No newline at end of file +}