From 8b4f141d8227154636f8879f849c9a7fb81f2e60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= <valknar@pivoine.art>
Date: Sun, 23 Nov 2025 10:15:43 +0100
Subject: [PATCH] fix: rebuild SVD-XT workflow with correct node types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace DiffusersLoader with ImageOnlyCheckpointLoader
- Replace SVDSampler with SVD_img2vid_Conditioning + KSampler
- Add VideoLinearCFGGuidance for temporal consistency
- Add all node connections in links array
- Configure VHS_VideoCombine with correct parameters (25 frames)
- Increase steps to 30 for better quality with longer video

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../svd-xt-i2v-production-v1.json             | 447 ++++++++++++++----
 1 file changed, 344 insertions(+), 103 deletions(-)

diff --git a/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json b/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json
index ef392d7..df57d25 100644
--- a/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json
+++ b/comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json
@@ -1,18 +1,13 @@
 {
-  "last_node_id": 8,
+  "last_node_id": 7,
   "last_link_id": 10,
   "nodes": [
     {
       "id": 1,
       "type": "LoadImage",
-      "pos": [
-        50,
-        100
-      ],
-      "widgets_values": [
-        "input_frame.png",
-        "image"
-      ],
+      "pos": [50, 100],
+      "size": [315, 314],
+      "widgets_values": ["input_frame.png", "image"],
       "title": "API Input Image",
       "flags": {},
       "order": 0,
@@ -20,157 +15,403 @@
       "properties": {
         "Node name for S&R": "LoadImage"
       },
-      "size": {
-        "0": 350,
-        "1": 100
-      }
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [1],
+          "slot_index": 0
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null
+        }
+      ]
     },
     {
       "id": 2,
-      "type": "DiffusersLoader",
-      "pos": [
-        50,
-        400
-      ],
-      "widgets_values": [
-        "diffusion_models/stable-video-diffusion-img2vid-xt"
-      ],
+      "type": "ImageOnlyCheckpointLoader",
+      "pos": [50, 500],
+      "size": [350, 100],
+      "widgets_values": ["svd_xt.safetensors"],
       "title": "SVD-XT Model Loader",
       "flags": {},
       "order": 1,
       "mode": 0,
       "properties": {
-        "Node name for S&R": "DiffusersLoader"
+        "Node name for S&R": "ImageOnlyCheckpointLoader"
       },
-      "size": {
-        "0": 350,
-        "1": 100
-      }
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [2],
+          "slot_index": 0
+        },
+        {
+          "name": "CLIP_VISION",
+          "type": "CLIP_VISION",
+          "links": [3],
+          "slot_index": 1
+        },
+        {
+          "name": "VAE",
+          "type": "VAE",
+          "links": [4, 5],
+          "slot_index": 2
+        }
+      ]
     },
     {
       "id": 3,
-      "type": "SVDSampler",
-      "pos": [
-        450,
-        100
-      ],
-      "widgets_values": [
-        42,
-        "fixed",
-        30,
-        25,
-        127,
-        0.02
-      ],
-      "title": "SVD-XT Sampler (25 frames)",
+      "type": "VideoLinearCFGGuidance",
+      "pos": [450, 500],
+      "size": [315, 100],
+      "widgets_values": [1.0],
+      "title": "Linear CFG Guidance",
       "flags": {},
       "order": 2,
       "mode": 0,
       "properties": {
-        "Node name for S&R": "SVDSampler"
+        "Node name for S&R": "VideoLinearCFGGuidance"
       },
-      "size": {
-        "0": 315,
-        "1": 474
-      }
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 2
+        }
+      ],
+      "outputs": [
+        {
+          "name": "MODEL",
+          "type": "MODEL",
+          "links": [6],
+          "slot_index": 0
+        }
+      ]
     },
     {
       "id": 4,
-      "type": "VAEDecode",
-      "pos": [
-        800,
-        100
-      ],
-      "title": "VAE Decode Video",
+      "type": "SVD_img2vid_Conditioning",
+      "pos": [450, 100],
+      "size": [315, 350],
+      "widgets_values": [1024, 576, 25, 127, 6, 0.0],
+      "title": "SVD-XT Image-to-Video Conditioning (25 frames)",
       "flags": {},
       "order": 3,
       "mode": 0,
       "properties": {
-        "Node name for S&R": "VAEDecode"
+        "Node name for S&R": "SVD_img2vid_Conditioning"
       },
-      "size": {
-        "0": 315,
-        "1": 100
-      }
+      "inputs": [
+        {
+          "name": "clip_vision",
+          "type": "CLIP_VISION",
+          "link": 3
+        },
+        {
+          "name": "init_image",
+          "type": "IMAGE",
+          "link": 1
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 4
+        }
+      ],
+      "outputs": [
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "links": [7],
+          "slot_index": 0
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "links": [8],
+          "slot_index": 1
+        },
+        {
+          "name": "latent",
+          "type": "LATENT",
+          "links": [9],
+          "slot_index": 2
+        }
+      ]
     },
     {
       "id": 5,
-      "type": "VHS_VideoCombine",
-      "pos": [
-        1100,
-        100
-      ],
-      "widgets_values": [
-        6,
-        0,
-        "svd_xt_output",
-        "video/h264-mp4"
-      ],
-      "title": "Combine Frames",
+      "type": "KSampler",
+      "pos": [800, 100],
+      "size": [315, 474],
+      "widgets_values": [42, "fixed", 30, 6.0, "euler", "karras", 1.0],
+      "title": "KSampler (30 steps)",
       "flags": {},
       "order": 4,
       "mode": 0,
+      "properties": {
+        "Node name for S&R": "KSampler"
+      },
+      "inputs": [
+        {
+          "name": "model",
+          "type": "MODEL",
+          "link": 6
+        },
+        {
+          "name": "positive",
+          "type": "CONDITIONING",
+          "link": 7
+        },
+        {
+          "name": "negative",
+          "type": "CONDITIONING",
+          "link": 8
+        },
+        {
+          "name": "latent_image",
+          "type": "LATENT",
+          "link": 9
+        }
+      ],
+      "outputs": [
+        {
+          "name": "LATENT",
+          "type": "LATENT",
+          "links": [10],
+          "slot_index": 0
+        }
+      ]
+    },
+    {
+      "id": 6,
+      "type": "VAEDecode",
+      "pos": [1150, 100],
+      "size": [210, 46],
+      "widgets_values": [],
+      "title": "VAE Decode Video Frames",
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "properties": {
+        "Node name for S&R": "VAEDecode"
+      },
+      "inputs": [
+        {
+          "name": "samples",
+          "type": "LATENT",
+          "link": 10
+        },
+        {
+          "name": "vae",
+          "type": "VAE",
+          "link": 5
+        }
+      ],
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [11],
+          "slot_index": 0
+        }
+      ]
+    },
+    {
+      "id": 7,
+      "type": "VHS_VideoCombine",
+      "pos": [1400, 100],
+      "size": [315, 200],
+      "widgets_values": [6, 0, "svd_xt_output", "video/h264-mp4", false, true],
+      "title": "Combine Video Frames",
+      "flags": {},
+      "order": 6,
+      "mode": 0,
       "properties": {
         "Node name for S&R": "VHS_VideoCombine"
       },
-      "size": {
-        "0": 315,
-        "1": 100
-      }
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 11
+        }
+      ],
+      "outputs": [
+        {
+          "name": "Filenames",
+          "type": "VHS_FILENAMES",
+          "links": null
+        }
+      ]
     }
   ],
-  "links": [],
+  "links": [
+    [1, 1, 0, 4, 1, "IMAGE"],
+    [2, 2, 0, 3, 0, "MODEL"],
+    [3, 2, 1, 4, 0, "CLIP_VISION"],
+    [4, 2, 2, 4, 2, "VAE"],
+    [5, 2, 2, 6, 1, "VAE"],
+    [6, 3, 0, 5, 0, "MODEL"],
+    [7, 4, 0, 5, 1, "CONDITIONING"],
+    [8, 4, 1, 5, 2, "CONDITIONING"],
+    [9, 4, 2, 5, 3, "LATENT"],
+    [10, 5, 0, 6, 0, "LATENT"],
+    [11, 6, 0, 7, 0, "IMAGE"]
+  ],
+  "groups": [],
+  "config": {},
   "extra": {
     "workflow_info": {
       "name": "Stable Video Diffusion XT Image-to-Video Production",
-      "version": "1.0.0",
+      "version": "1.2.0",
       "description": "Extended animation using SVD-XT. Generate 25-frame video for longer animations with smooth motion.",
       "category": "image-to-video",
-      "tags": [
-        "svd-xt",
-        "stable-video-diffusion",
-        "i2v",
-        "extended",
-        "production"
-      ],
+      "tags": ["svd-xt", "stable-video-diffusion", "i2v", "extended", "production"],
       "requirements": {
-        "models": [
-          "stable-video-diffusion-img2vid-xt"
-        ],
-        "custom_nodes": [
-          "ComfyUI-VideoHelperSuite"
-        ],
-        "vram_min": "18GB"
+        "models": ["SVD-XT"],
+        "custom_nodes": ["ComfyUI-VideoHelperSuite"],
+        "vram_min": "18GB",
+        "vram_recommended": "20GB"
       },
       "parameters": {
         "input_image": {
           "node_id": 1,
+          "widget_index": 0,
           "type": "image",
-          "required": true
+          "required": true,
+          "description": "Starting frame for video generation (1024x576 recommended)"
+        },
+        "width": {
+          "node_id": 4,
+          "widget_index": 0,
+          "type": "integer",
+          "required": false,
+          "default": 1024,
+          "min": 16,
+          "max": 16384,
+          "description": "Output video width"
+        },
+        "height": {
+          "node_id": 4,
+          "widget_index": 1,
+          "type": "integer",
+          "required": false,
+          "default": 576,
+          "min": 16,
+          "max": 16384,
+          "description": "Output video height"
+        },
+        "video_frames": {
+          "node_id": 4,
+          "widget_index": 2,
+          "type": "integer",
+          "required": false,
+          "default": 25,
+          "min": 1,
+          "max": 4096,
+          "description": "Number of frames to generate (25 for SVD-XT)"
+        },
+        "motion_bucket_id": {
+          "node_id": 4,
+          "widget_index": 3,
+          "type": "integer",
+          "required": false,
+          "default": 127,
+          "min": 1,
+          "max": 1023,
+          "description": "Motion amount (higher = more motion)"
+        },
+        "fps": {
+          "node_id": 4,
+          "widget_index": 4,
+          "type": "integer",
+          "required": false,
+          "default": 6,
+          "min": 1,
+          "max": 1024,
+          "description": "Frames per second for conditioning"
+        },
+        "augmentation_level": {
+          "node_id": 4,
+          "widget_index": 5,
+          "type": "float",
+          "required": false,
+          "default": 0.0,
+          "min": 0.0,
+          "max": 10.0,
+          "description": "Noise augmentation level"
         },
         "steps": {
-          "node_id": 3,
+          "node_id": 5,
+          "widget_index": 2,
           "type": "integer",
-          "default": 30
+          "required": false,
+          "default": 30,
+          "min": 1,
+          "max": 150,
+          "description": "Sampling steps (30 recommended)"
         },
-        "frames": {
-          "node_id": 3,
-          "type": "integer",
-          "default": 25,
-          "description": "Number of output frames"
+        "cfg": {
+          "node_id": 5,
+          "widget_index": 3,
+          "type": "float",
+          "required": false,
+          "default": 6.0,
+          "min": 0.0,
+          "max": 30.0,
+          "description": "Classifier-free guidance scale"
         },
-        "motion_bucket": {
-          "node_id": 3,
+        "seed": {
+          "node_id": 5,
+          "widget_index": 0,
           "type": "integer",
-          "default": 127
+          "required": false,
+          "default": 42,
+          "description": "Random seed for reproducibility"
+        },
+        "output_fps": {
+          "node_id": 7,
+          "widget_index": 0,
+          "type": "integer",
+          "required": false,
+          "default": 6,
+          "description": "Output video framerate"
+        }
+      },
+      "outputs": {
+        "video": {
+          "node_id": 7,
+          "type": "video",
+          "format": "MP4 (H.264)",
+          "resolution": "1024x576 (configurable)",
+          "duration": "~4.2 seconds @ 6fps (25 frames)"
         }
       },
       "performance": {
         "avg_generation_time": "40-55 seconds",
-        "vram_usage": "~16-18GB",
-        "output": "25 frames (~4.2s @ 6fps)"
-      }
+        "vram_usage": "~18-20GB",
+        "gpu_utilization": "95-100%"
+      },
+      "use_cases": [
+        "Extended animations with smooth motion",
+        "Longer video loops from single frames",
+        "Cinematic camera movements",
+        "Product animation showcases"
+      ],
+      "notes": [
+        "SVD-XT generates 25 frames vs 14 frames in base SVD",
+        "Requires more VRAM (~18GB vs ~16GB)",
+        "Model auto-downloads on first use (~9GB)",
+        "Recommended resolution: 1024x576 (16:9)",
+        "Higher motion_bucket_id = more movement",
+        "Linear CFG guidance improves temporal consistency"
+      ]
     }
   },
   "version": 0.4
-}
\ No newline at end of file
+}