From 764cb5d2d79310b0123c8c7b8ef6de0a6bd5bcbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= <valknar@pivoine.art>
Date: Sun, 23 Nov 2025 10:10:38 +0100
Subject: [PATCH] fix: rebuild SVD workflow with correct node types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace DiffusersLoader with ImageOnlyCheckpointLoader
- Replace SVDSampler with SVD_img2vid_Conditioning + KSampler
- Add VideoLinearCFGGuidance for temporal consistency
- Add all node connections in links array
- Configure VHS_VideoCombine with H.264 parameters

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../image-to-video/svd-i2v-production-v1.json | 451 ++++++++++++++----
 1 file changed, 345 insertions(+), 106 deletions(-)

diff --git a/comfyui/workflows/image-to-video/svd-i2v-production-v1.json b/comfyui/workflows/image-to-video/svd-i2v-production-v1.json
index 2fc2f92..c276f83 100644
--- a/comfyui/workflows/image-to-video/svd-i2v-production-v1.json
+++ b/comfyui/workflows/image-to-video/svd-i2v-production-v1.json
@@ -1,18 +1,13 @@
 {
-  "last_node_id": 8,
+  "last_node_id": 7,
   "last_link_id": 10,
   "nodes": [
     {
       "id": 1,
       "type": "LoadImage",
-      "pos": [
-        50,
-        100
-      ],
-      "widgets_values": [
-        "input_frame.png",
-        "image"
-      ],
+      "pos": [50, 100],
+      "size": [315, 314],
+      "widgets_values": ["input_frame.png", "image"],
       "title": "API Input Image",
       "flags": {},
       "order": 0,
@@ -20,158 +15,402 @@
       "properties": {
         "Node name for S&R": "LoadImage"
       },
-      "size": {
-        "0": 350,
-        "1": 100
-      }
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [1],
+          "slot_index": 0
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null
+        }
+      ]
     },
     {
       "id": 2,
-      "type": "DiffusersLoader",
-      "pos": [
-        50,
-        400
-      ],
-      "widgets_values": [
-        "diffusion_models/stable-video-diffusion-img2vid"
-      ],
-      "title": "SVD Model Loader",
+      "type": "ImageOnlyCheckpointLoader",
+      "pos": [50, 500],
+      "size": [350, 100],
+      "widgets_values": ["svd_xt.safetensors"],
+      "title": "SVD-XT Model Loader",
       "flags": {},
       "order": 1,
       "mode": 0,
       "properties": {
-        "Node name for S&R": "DiffusersLoader"
+        "Node name for S&R": "ImageOnlyCheckpointLoader"
       },
-      "size": {
-        "0": 350,
-        "1": 100
-      }
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [2],
+          "slot_index": 0
+        },
+        {
+          "name": "CLIP_VISION",
+          "type": "CLIP_VISION",
+          "links": [3],
+          "slot_index": 1
+        },
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "links": [4, 5],
+          "slot_index": 2
+        }
+      ]
     },
     {
       "id": 3,
-      "type": "SVDSampler",
-      "pos": [
-        450,
-        100
-      ],
-      "widgets_values": [
-        42,
-        "fixed",
-        25,
-        14,
-        127,
-        0.02
-      ],
-      "title": "SVD Sampler (14 frames)",
+      "type": "VideoLinearCFGGuidance",
+      "pos": [450, 500],
+      "size": [315, 100],
+      "widgets_values": [1.0],
+      "title": "Linear CFG Guidance",
       "flags": {},
       "order": 2,
       "mode": 0,
       "properties": {
-        "Node name for S&R": "SVDSampler"
+        "Node name for S&R": "VideoLinearCFGGuidance"
       },
-      "size": {
-        "0": 315,
-        "1": 474
-      }
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 2
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [6],
+          "slot_index": 0
+        }
+      ]
     },
     {
       "id": 4,
-      "type": "VAEDecode",
-      "pos": [
-        800,
-        100
-      ],
-      "title": "VAE Decode Video",
+      "type": "SVD_img2vid_Conditioning",
+      "pos": [450, 100],
+      "size": [315, 350],
+      "widgets_values": [1024, 576, 14, 127, 6, 0.0],
+      "title": "SVD Image-to-Video Conditioning",
       "flags": {},
       "order": 3,
       "mode": 0,
       "properties": {
-        "Node name for S&R": "VAEDecode"
+        "Node name for S&R": "SVD_img2vid_Conditioning"
       },
-      "size": {
-        "0": 315,
-        "1": 100
-      }
+      "inputs": [
+        {
+          "name": "clip_vision",
+          "type": "CLIP_VISION",
+          "link": 3
+        },
+        {
+          "name": "init_image",
+          "type": "IMAGE",
+          "link": 1
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 4
+        }
+      ],
+      "outputs": [
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "links": [7],
+          "slot_index": 0
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "links": [8],
+          "slot_index": 1
+        },
+        {
+          "name": "latent",
+          "type": "LATENT",
+          "links": [9],
+          "slot_index": 2
+        }
+      ]
     },
     {
       "id": 5,
-      "type": "VHS_VideoCombine",
-      "pos": [
-        1100,
-        100
-      ],
-      "widgets_values": [
-        6,
-        0,
-        "svd_output",
-        "video/h264-mp4"
-      ],
-      "title": "Combine Frames",
+      "type": "KSampler",
+      "pos": [800, 100],
+      "size": [315, 474],
+      "widgets_values": [42, "fixed", 25, 6.0, "euler", "karras", 1.0],
+      "title": "KSampler (25 steps)",
       "flags": {},
       "order": 4,
       "mode": 0,
+      "properties": {
+        "Node name for S&R": "KSampler"
+      },
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 6
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 7
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 8
+        },
+        {
+          "name": "latent_image",
+          "type": "LATENT",
+          "link": 9
+        }
+      ],
+      "outputs": [
+        {
+          "name": "LATENT",
+          "type": "LATENT",
+          "links": [10],
+          "slot_index": 0
+        }
+      ]
+    },
+    {
+      "id": 6,
+      "type": "VAEDecode",
+      "pos": [1150, 100],
+      "size": [210, 46],
+      "widgets_values": [],
+      "title": "VAE Decode Video Frames",
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "properties": {
+        "Node name for S&R": "VAEDecode"
+      },
+      "inputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 10
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 5
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [11],
+          "slot_index": 0
+        }
+      ]
+    },
+    {
+      "id": 7,
+      "type": "VHS_VideoCombine",
+      "pos": [1400, 100],
+      "size": [315, 200],
+      "widgets_values": [6, 0, "svd_output", "video/h264-mp4", "yuv420p", 19, true, false],
+      "title": "Combine Video Frames",
+      "flags": {},
+      "order": 6,
+      "mode": 0,
       "properties": {
         "Node name for S&R": "VHS_VideoCombine"
       },
-      "size": {
-        "0": 315,
-        "1": 100
-      }
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 11
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ]
     }
   ],
-  "links": [],
+  "links": [
+    [1, 1, 0, 4, 1, "IMAGE"],
+    [2, 2, 0, 3, 0, "MODEL"],
+    [3, 2, 1, 4, 0, "CLIP_VISION"],
+    [4, 2, 2, 4, 2, "VAE"],
+    [5, 2, 2, 6, 1, "VAE"],
+    [6, 3, 0, 5, 0, "MODEL"],
+    [7, 4, 0, 5, 1, "CONDITIONING"],
+    [8, 4, 1, 5, 2, "CONDITIONING"],
+    [9, 4, 2, 5, 3, "LATENT"],
+    [10, 5, 0, 6, 0, "LATENT"],
+    [11, 6, 0, 7, 0, "IMAGE"]
+  ],
+  "groups": [],
+  "config": {},
   "extra": {
     "workflow_info": {
       "name": "Stable Video Diffusion Image-to-Video Production",
-      "version": "1.0.0",
-      "description": "Quick animation using SVD. Generate 14-frame video from single image with motion and camera movement.",
+      "version": "1.2.0",
+      "description": "Quick animation using SVD-XT. Generate 14-frame video from single image with motion and camera movement.",
       "category": "image-to-video",
-      "tags": [
-        "svd",
-        "stable-video-diffusion",
-        "i2v",
-        "animation",
-        "production"
-      ],
+      "tags": ["svd", "svd-xt", "stable-video-diffusion", "i2v", "animation", "production"],
       "requirements": {
-        "models": [
-          "stable-video-diffusion-img2vid"
-        ],
-        "custom_nodes": [
-          "ComfyUI-VideoHelperSuite"
-        ],
-        "vram_min": "16GB"
+        "models": ["SVD-XT"],
+        "custom_nodes": ["ComfyUI-VideoHelperSuite"],
+        "vram_min": "16GB",
+        "vram_recommended": "20GB"
       },
       "parameters": {
         "input_image": {
           "node_id": 1,
+          "widget_index": 0,
           "type": "image",
-          "required": true
+          "required": true,
+          "description": "Starting frame for video generation (1024x576 recommended)"
+        },
+        "width": {
+          "node_id": 4,
+          "widget_index": 0,
+          "type": "integer",
+          "required": false,
+          "default": 1024,
+          "min": 16,
+          "max": 16384,
+          "description": "Output video width"
+        },
+        "height": {
+          "node_id": 4,
+          "widget_index": 1,
+          "type": "integer",
+          "required": false,
+          "default": 576,
+          "min": 16,
+          "max": 16384,
+          "description": "Output video height"
+        },
+        "video_frames": {
+          "node_id": 4,
+          "widget_index": 2,
+          "type": "integer",
+          "required": false,
+          "default": 14,
+          "min": 1,
+          "max": 4096,
+          "description": "Number of frames to generate (14 or 25 for SVD/SVD-XT)"
+        },
+        "motion_bucket_id": {
+          "node_id": 4,
+          "widget_index": 3,
+          "type": "integer",
+          "required": false,
+          "default": 127,
+          "min": 1,
+          "max": 1023,
+          "description": "Motion amount (higher = more motion)"
+        },
+        "fps": {
+          "node_id": 4,
+          "widget_index": 4,
+          "type": "integer",
+          "required": false,
+          "default": 6,
+          "min": 1,
+          "max": 1024,
+          "description": "Frames per second for conditioning"
+        },
+        "augmentation_level": {
+          "node_id": 4,
+          "widget_index": 5,
+          "type": "float",
+          "required": false,
+          "default": 0.0,
+          "min": 0.0,
+          "max": 10.0,
+          "description": "Noise augmentation level"
         },
         "steps": {
-          "node_id": 3,
+          "node_id": 5,
+          "widget_index": 2,
           "type": "integer",
-          "default": 25
+          "required": false,
+          "default": 25,
+          "min": 1,
+          "max": 150,
+          "description": "Sampling steps (25 recommended)"
         },
-        "frames": {
-          "node_id": 3,
-          "type": "integer",
-          "default": 14,
-          "description": "Number of output frames"
+        "cfg": {
+          "node_id": 5,
+          "widget_index": 3,
+          "type": "float",
+          "required": false,
+          "default": 6.0,
+          "min": 0.0,
+          "max": 30.0,
+          "description": "Classifier-free guidance scale"
         },
-        "motion_bucket": {
-          "node_id": 3,
+        "seed": {
+          "node_id": 5,
+          "widget_index": 0,
           "type": "integer",
-          "default": 127,
-          "description": "Motion amount (0-255)"
+          "required": false,
+          "default": 42,
+          "description": "Random seed for reproducibility"
+        },
+        "output_fps": {
+          "node_id": 7,
+          "widget_index": 0,
+          "type": "integer",
+          "required": false,
+          "default": 6,
+          "description": "Output video framerate"
+        }
+      },
+      "outputs": {
+        "video": {
+          "node_id": 7,
+          "type": "video",
+          "format": "MP4 (H.264)",
+          "resolution": "1024x576 (configurable)",
+          "duration": "~2.3 seconds @ 6fps (14 frames)"
         }
       },
       "performance": {
         "avg_generation_time": "25-35 seconds",
-        "vram_usage": "~14-16GB",
-        "output": "14 frames (~2.3s @ 6fps)"
-      }
+        "vram_usage": "~16-18GB",
+        "gpu_utilization": "95-100%"
+      },
+      "use_cases": [
+        "Animate static images with natural motion",
+        "Create short video loops from single frames",
+        "Add subtle camera movements to still images",
+        "Generate product animation previews"
+      ],
+      "notes": [
+        "SVD-XT extends frame count from 14 to 25 frames",
+        "Model auto-downloads on first use (~9GB)",
+        "Recommended resolution: 1024x576 (16:9)",
+        "Higher motion_bucket_id = more movement",
+        "Linear CFG guidance improves temporal consistency"
+      ]
     }
   },
   "version": 0.4
-}
\ No newline at end of file
+}