From 764cb5d2d79310b0123c8c7b8ef6de0a6bd5bcbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 23 Nov 2025 10:10:38 +0100 Subject: [PATCH] fix: rebuild SVD workflow with correct node types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace DiffusersLoader with ImageOnlyCheckpointLoader - Replace SVDSampler with SVD_img2vid_Conditioning + KSampler - Add VideoLinearCFGGuidance for temporal consistency - Add all node connections in links array - Configure VHS_VideoCombine with H.264 parameters 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../image-to-video/svd-i2v-production-v1.json | 451 ++++++++++++++---- 1 file changed, 345 insertions(+), 106 deletions(-) diff --git a/comfyui/workflows/image-to-video/svd-i2v-production-v1.json b/comfyui/workflows/image-to-video/svd-i2v-production-v1.json index 2fc2f92..c276f83 100644 --- a/comfyui/workflows/image-to-video/svd-i2v-production-v1.json +++ b/comfyui/workflows/image-to-video/svd-i2v-production-v1.json @@ -1,18 +1,13 @@ { - "last_node_id": 8, + "last_node_id": 7, "last_link_id": 10, "nodes": [ { "id": 1, "type": "LoadImage", - "pos": [ - 50, - 100 - ], - "widgets_values": [ - "input_frame.png", - "image" - ], + "pos": [50, 100], + "size": [315, 314], + "widgets_values": ["input_frame.png", "image"], "title": "API Input Image", "flags": {}, "order": 0, @@ -20,158 +15,402 @@ "properties": { "Node name for S&R": "LoadImage" }, - "size": { - "0": 350, - "1": 100 - } + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [1], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ] }, { "id": 2, - "type": "DiffusersLoader", - "pos": [ - 50, - 400 - ], - "widgets_values": [ - "diffusion_models/stable-video-diffusion-img2vid" - ], - "title": "SVD Model Loader", + "type": "ImageOnlyCheckpointLoader", + "pos": [50, 500], + "size": [350, 100], + "widgets_values": ["svd_xt.safetensors"], + "title": "SVD-XT Model Loader", "flags": {}, "order": 1, "mode": 0, "properties": { - "Node name for S&R": "DiffusersLoader" + "Node name for S&R": "ImageOnlyCheckpointLoader" }, - "size": { - "0": 350, - "1": 100 - } + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [2], + "slot_index": 0 + }, + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [3], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [4, 5], + "slot_index": 2 + } + ] }, { "id": 3, - "type": "SVDSampler", - "pos": [ - 450, - 100 - ], - "widgets_values": [ - 42, - "fixed", - 25, - 14, - 127, - 0.02 - ], - "title": "SVD Sampler (14 frames)", + "type": "VideoLinearCFGGuidance", + "pos": [450, 500], + "size": [315, 100], + "widgets_values": [1.0], + "title": "Linear CFG Guidance", "flags": {}, "order": 2, "mode": 0, "properties": { - "Node name for S&R": "SVDSampler" + "Node name for S&R": "VideoLinearCFGGuidance" }, - "size": { - "0": 315, - "1": 474 - } + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 2 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [6], + "slot_index": 0 + } + ] }, { "id": 4, - "type": "VAEDecode", - "pos": [ - 800, - 100 - ], - "title": "VAE Decode Video", + "type": "SVD_img2vid_Conditioning", + "pos": [450, 100], + "size": [315, 350], + "widgets_values": [1024, 576, 14, 127, 6, 0.0], + "title": "SVD Image-to-Video Conditioning", "flags": {}, "order": 3, "mode": 0, "properties": { - "Node name for S&R": "VAEDecode" + "Node name for S&R": "SVD_img2vid_Conditioning" }, - "size": { - "0": 315, - "1": 100 - } + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 3 + }, + { + "name": "init_image", + "type": "IMAGE", + "link": 1 + }, + { + "name": "vae", + "type": "VAE", + "link": 4 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [7], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [8], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [9], + "slot_index": 2 + } + ] }, { "id": 5, - "type": "VHS_VideoCombine", - "pos": [ - 1100, - 100 - ], - "widgets_values": [ - 6, - 0, - "svd_output", - "video/h264-mp4" - ], - "title": "Combine Frames", + "type": "KSampler", + "pos": [800, 100], + "size": [315, 474], + "widgets_values": [42, "fixed", 25, 6.0, "euler", "karras", 1.0], + "title": "KSampler (25 steps)", "flags": {}, "order": 4, "mode": 0, + "properties": { + "Node name for S&R": "KSampler" + }, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 6 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 7 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 8 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 9 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [10], + "slot_index": 0 + } + ] + }, + { + "id": 6, + "type": "VAEDecode", + "pos": [1150, 100], + "size": [210, 46], + "widgets_values": [], + "title": "VAE Decode Video Frames", + "flags": {}, + "order": 5, + "mode": 0, + "properties": { + "Node name for S&R": "VAEDecode" + }, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 10 + }, + { + "name": "vae", + "type": "VAE", + "link": 5 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [11], + "slot_index": 0 + } + ] + }, + { + "id": 7, + "type": "VHS_VideoCombine", + "pos": [1400, 100], + "size": [315, 200], + "widgets_values": [6, 0, "svd_output", "video/h264-mp4", "yuv420p", 19, true, false], + "title": "Combine Video Frames", + "flags": {}, + "order": 6, + "mode": 0, "properties": { "Node name for S&R": "VHS_VideoCombine" }, - "size": { - "0": 315, - "1": 100 - } + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 11 + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null + } + ] } ], - "links": [], + "links": [ + [1, 1, 0, 4, 1, "IMAGE"], + [2, 2, 0, 3, 0, "MODEL"], + [3, 2, 1, 4, 0, "CLIP_VISION"], + [4, 2, 2, 4, 2, "VAE"], + [5, 2, 2, 6, 1, "VAE"], + [6, 3, 0, 5, 0, "MODEL"], + [7, 4, 0, 5, 1, "CONDITIONING"], + [8, 4, 1, 5, 2, "CONDITIONING"], + [9, 4, 2, 5, 3, "LATENT"], + [10, 5, 0, 6, 0, "LATENT"], + [11, 6, 0, 7, 0, "IMAGE"] + ], + "groups": [], + "config": {}, "extra": { "workflow_info": { "name": "Stable Video Diffusion Image-to-Video Production", - "version": "1.0.0", - "description": "Quick animation using SVD. Generate 14-frame video from single image with motion and camera movement.", + "version": "1.2.0", + "description": "Quick animation using SVD-XT. Generate 14-frame video from single image with motion and camera movement.", "category": "image-to-video", - "tags": [ - "svd", - "stable-video-diffusion", - "i2v", - "animation", - "production" - ], + "tags": ["svd", "svd-xt", "stable-video-diffusion", "i2v", "animation", "production"], "requirements": { - "models": [ - "stable-video-diffusion-img2vid" - ], - "custom_nodes": [ - "ComfyUI-VideoHelperSuite" - ], - "vram_min": "16GB" + "models": ["SVD-XT"], + "custom_nodes": ["ComfyUI-VideoHelperSuite"], + "vram_min": "16GB", + "vram_recommended": "20GB" }, "parameters": { "input_image": { "node_id": 1, + "widget_index": 0, "type": "image", - "required": true + "required": true, + "description": "Starting frame for video generation (1024x576 recommended)" + }, + "width": { + "node_id": 4, + "widget_index": 0, + "type": "integer", + "required": false, + "default": 1024, + "min": 16, + "max": 16384, + "description": "Output video width" + }, + "height": { + "node_id": 4, + "widget_index": 1, + "type": "integer", + "required": false, + "default": 576, + "min": 16, + "max": 16384, + "description": "Output video height" + }, + "video_frames": { + "node_id": 4, + "widget_index": 2, + "type": "integer", + "required": false, + "default": 14, + "min": 1, + "max": 4096, + "description": "Number of frames to generate (14 or 25 for SVD/SVD-XT)" + }, + "motion_bucket_id": { + "node_id": 4, + "widget_index": 3, + "type": "integer", + "required": false, + "default": 127, + "min": 1, + "max": 1023, + "description": "Motion amount (higher = more motion)" + }, + "fps": { + "node_id": 4, + "widget_index": 4, + "type": "integer", + "required": false, + "default": 6, + "min": 1, + "max": 1024, + "description": "Frames per second for conditioning" + }, + "augmentation_level": { + "node_id": 4, + "widget_index": 5, + "type": "float", + "required": false, + "default": 0.0, + "min": 0.0, + "max": 10.0, + "description": "Noise augmentation level" }, "steps": { - "node_id": 3, + "node_id": 5, + "widget_index": 2, "type": "integer", - "default": 25 + "required": false, + "default": 25, + "min": 1, + "max": 150, + "description": "Sampling steps (25 recommended)" }, - "frames": { - "node_id": 3, - "type": "integer", - "default": 14, - "description": "Number of output frames" + "cfg": { + "node_id": 5, + "widget_index": 3, + "type": "float", + "required": false, + "default": 6.0, + "min": 0.0, + "max": 30.0, + "description": "Classifier-free guidance scale" }, - "motion_bucket": { - "node_id": 3, + "seed": { + "node_id": 5, + "widget_index": 0, "type": "integer", - "default": 127, - "description": "Motion amount (0-255)" + "required": false, + "default": 42, + "description": "Random seed for reproducibility" + }, + "output_fps": { + "node_id": 7, + "widget_index": 0, + "type": "integer", + "required": false, + "default": 6, + "description": "Output video framerate" + } + }, + "outputs": { + "video": { + "node_id": 7, + "type": "video", + "format": "MP4 (H.264)", + "resolution": "1024x576 (configurable)", + "duration": "~2.3 seconds @ 6fps (14 frames)" } }, "performance": { "avg_generation_time": "25-35 seconds", - "vram_usage": "~14-16GB", - "output": "14 frames (~2.3s @ 6fps)" - } + "vram_usage": "~16-18GB", + "gpu_utilization": "95-100%" + }, + "use_cases": [ + "Animate static images with natural motion", + "Create short video loops from single frames", + "Add subtle camera movements to still images", + "Generate product animation previews" + ], + "notes": [ + "SVD-XT extends frame count from 14 to 25 frames", + "Model auto-downloads on first use (~9GB)", + "Recommended resolution: 1024x576 (16:9)", + "Higher motion_bucket_id = more movement", + "Linear CFG guidance improves temporal consistency" + ] } }, "version": 0.4 -} \ No newline at end of file +}