feat: add complete HunyuanVideo and Wan2.2 video generation integration
All checks were successful
Build and Push RunPod Docker Image / build-and-push (push) Successful in 15s
Integrated 35+ video generation models and 13 production workflows from ComfyUI docs tutorials for state-of-the-art text-to-video and image-to-video generation. Models Added (models_huggingface.yaml): - HunyuanVideo (5 models): Original T2V/I2V (720p), v1.5 (720p/1080p) with Qwen 2.5 VL - Wan2.2 diffusion models (18 models): - 5B TI2V hybrid (8GB VRAM, efficient) - 14B variants: T2V, I2V (high/low noise), Animate, S2V (FP8/BF16), Fun Camera/Control (high/low noise) - Support models (12): VAEs, UMT5-XXL, CLIP Vision H, Wav2Vec2, LLaVA encoders - LoRA accelerators (4): Lightx2v 4-step distillation for 5x speedup Workflows Added (comfyui/workflows/image-to-video/): - HunyuanVideo (5 workflows): T2V original, I2V v1/v2 (webp embedded), v1.5 T2V/I2V (JSON) - Wan2.2 (8 workflows): 5B TI2V, 14B T2V/I2V/FLF2V/Animate/S2V/Fun Camera/Fun Control - Asset files (10): Reference images, videos, audio for workflow testing Custom Nodes Added (arty.yml): - ComfyUI-KJNodes: Kijai optimizations for HunyuanVideo/Wan2.2 (FP8 scaling, video helpers) - comfyui_controlnet_aux: ControlNet preprocessors (Canny, Depth, OpenPose, MLSD) for Fun Control - ComfyUI-GGUF: GGUF quantization support for memory optimization VRAM Requirements: - HunyuanVideo original: 24GB (720p T2V/I2V, 129 frames, 5s generation) - HunyuanVideo 1.5: 30-60GB (720p/1080p, improved quality with Qwen 2.5 VL) - Wan2.2 5B: 8GB (efficient dual-expert architecture with native offloading) - Wan2.2 14B: 24GB (high-quality video generation, all modes) Note: Wan2.2 Fun Inpaint workflow not available in official templates repository (404). Tutorial Sources: - https://docs.comfy.org/tutorials/video/hunyuan/hunyuan-video - https://docs.comfy.org/tutorials/video/hunyuan/hunyuan-video-1-5 - https://docs.comfy.org/tutorials/video/wan/wan2_2 - https://docs.comfy.org/tutorials/video/wan/wan2-2-animate - https://docs.comfy.org/tutorials/video/wan/wan2-2-s2v - https://docs.comfy.org/tutorials/video/wan/wan2-2-fun-camera - https://docs.comfy.org/tutorials/video/wan/wan2-2-fun-control 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
15
arty.yml
@@ -78,6 +78,21 @@ references:
|
||||
description: "Ultimate SD Upscale for high-quality image upscaling"
|
||||
essential: false
|
||||
|
||||
- url: https://github.com/kijai/ComfyUI-KJNodes.git
|
||||
into: $COMFYUI_ROOT/custom_nodes/ComfyUI-KJNodes
|
||||
description: "Kijai optimizations for HunyuanVideo and Wan2.2 (FP8 scaling, video helpers, model loading)"
|
||||
essential: true
|
||||
|
||||
- url: https://github.com/Fannovel16/comfyui_controlnet_aux.git
|
||||
into: $COMFYUI_ROOT/custom_nodes/comfyui_controlnet_aux
|
||||
description: "ControlNet preprocessors (Canny, Depth, OpenPose, MLSD) for Wan2.2 Fun Control"
|
||||
essential: true
|
||||
|
||||
- url: https://github.com/city96/ComfyUI-GGUF.git
|
||||
into: $COMFYUI_ROOT/custom_nodes/ComfyUI-GGUF
|
||||
description: "GGUF quantization support for memory-efficient model loading"
|
||||
essential: false
|
||||
|
||||
# Environment profiles for selective repository management
|
||||
envs:
|
||||
# RunPod environment variables
|
||||
|
||||
BIN
comfyui/workflows/image-to-video/i2v_hunyuan-i2v-v1-robot.webp
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
comfyui/workflows/image-to-video/i2v_hunyuan-i2v-v2-fennec.webp
Normal file
|
After Width: | Height: | Size: 2.8 MiB |
BIN
comfyui/workflows/image-to-video/i2v_hunyuan-t2v-kitchen.webp
Normal file
|
After Width: | Height: | Size: 1.4 MiB |
2528
comfyui/workflows/image-to-video/i2v_hunyuan15-i2v-720p.json
Normal file
2269
comfyui/workflows/image-to-video/i2v_hunyuan15-t2v-720p.json
Normal file
6182
comfyui/workflows/image-to-video/i2v_wan22-14b-animate.json
Normal file
2739
comfyui/workflows/image-to-video/i2v_wan22-14b-flf2v.json
Normal file
2735
comfyui/workflows/image-to-video/i2v_wan22-14b-fun-camera.json
Normal file
2908
comfyui/workflows/image-to-video/i2v_wan22-14b-fun-control.json
Normal file
2327
comfyui/workflows/image-to-video/i2v_wan22-14b-i2v.json
Normal file
7988
comfyui/workflows/image-to-video/i2v_wan22-14b-s2v.json
Normal file
1876
comfyui/workflows/image-to-video/i2v_wan22-14b-t2v.json
Normal file
733
comfyui/workflows/image-to-video/i2v_wan22-5b-ti2v.json
Normal file
@@ -0,0 +1,733 @@
|
||||
{
|
||||
"id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb",
|
||||
"revision": 0,
|
||||
"last_node_id": 59,
|
||||
"last_link_id": 108,
|
||||
"nodes": [
|
||||
{
|
||||
"id": 37,
|
||||
"type": "UNETLoader",
|
||||
"pos": [
|
||||
-30,
|
||||
50
|
||||
],
|
||||
"size": [
|
||||
346.7470703125,
|
||||
82
|
||||
],
|
||||
"flags": {},
|
||||
"order": 0,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "MODEL",
|
||||
"type": "MODEL",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
94
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "UNETLoader",
|
||||
"models": [
|
||||
{
|
||||
"name": "wan2.2_ti2v_5B_fp16.safetensors",
|
||||
"url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors",
|
||||
"directory": "diffusion_models"
|
||||
}
|
||||
]
|
||||
},
|
||||
"widgets_values": [
|
||||
"wan2.2_ti2v_5B_fp16.safetensors",
|
||||
"default"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 38,
|
||||
"type": "CLIPLoader",
|
||||
"pos": [
|
||||
-30,
|
||||
190
|
||||
],
|
||||
"size": [
|
||||
350,
|
||||
110
|
||||
],
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "CLIP",
|
||||
"type": "CLIP",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
74,
|
||||
75
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "CLIPLoader",
|
||||
"models": [
|
||||
{
|
||||
"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
"url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
"directory": "text_encoders"
|
||||
}
|
||||
]
|
||||
},
|
||||
"widgets_values": [
|
||||
"umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
"wan",
|
||||
"default"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 39,
|
||||
"type": "VAELoader",
|
||||
"pos": [
|
||||
-30,
|
||||
350
|
||||
],
|
||||
"size": [
|
||||
350,
|
||||
60
|
||||
],
|
||||
"flags": {},
|
||||
"order": 2,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "VAE",
|
||||
"type": "VAE",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
76,
|
||||
105
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "VAELoader",
|
||||
"models": [
|
||||
{
|
||||
"name": "wan2.2_vae.safetensors",
|
||||
"url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan2.2_vae.safetensors",
|
||||
"directory": "vae"
|
||||
}
|
||||
]
|
||||
},
|
||||
"widgets_values": [
|
||||
"wan2.2_vae.safetensors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"type": "VAEDecode",
|
||||
"pos": [
|
||||
1190,
|
||||
150
|
||||
],
|
||||
"size": [
|
||||
210,
|
||||
46
|
||||
],
|
||||
"flags": {},
|
||||
"order": 10,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "samples",
|
||||
"type": "LATENT",
|
||||
"link": 35
|
||||
},
|
||||
{
|
||||
"name": "vae",
|
||||
"type": "VAE",
|
||||
"link": 76
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "IMAGE",
|
||||
"type": "IMAGE",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
107
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "VAEDecode"
|
||||
},
|
||||
"widgets_values": []
|
||||
},
|
||||
{
|
||||
"id": 57,
|
||||
"type": "CreateVideo",
|
||||
"pos": [
|
||||
1200,
|
||||
240
|
||||
],
|
||||
"size": [
|
||||
270,
|
||||
78
|
||||
],
|
||||
"flags": {},
|
||||
"order": 11,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "images",
|
||||
"type": "IMAGE",
|
||||
"link": 107
|
||||
},
|
||||
{
|
||||
"name": "audio",
|
||||
"shape": 7,
|
||||
"type": "AUDIO",
|
||||
"link": null
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "VIDEO",
|
||||
"type": "VIDEO",
|
||||
"links": [
|
||||
108
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "CreateVideo"
|
||||
},
|
||||
"widgets_values": [
|
||||
24
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 58,
|
||||
"type": "SaveVideo",
|
||||
"pos": [
|
||||
1200,
|
||||
370
|
||||
],
|
||||
"size": [
|
||||
660,
|
||||
450
|
||||
],
|
||||
"flags": {},
|
||||
"order": 12,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "video",
|
||||
"type": "VIDEO",
|
||||
"link": 108
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "SaveVideo"
|
||||
},
|
||||
"widgets_values": [
|
||||
"video/ComfyUI",
|
||||
"auto",
|
||||
"auto"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 55,
|
||||
"type": "Wan22ImageToVideoLatent",
|
||||
"pos": [
|
||||
380,
|
||||
540
|
||||
],
|
||||
"size": [
|
||||
271.9126892089844,
|
||||
150
|
||||
],
|
||||
"flags": {},
|
||||
"order": 8,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "vae",
|
||||
"type": "VAE",
|
||||
"link": 105
|
||||
},
|
||||
{
|
||||
"name": "start_image",
|
||||
"shape": 7,
|
||||
"type": "IMAGE",
|
||||
"link": 106
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "LATENT",
|
||||
"type": "LATENT",
|
||||
"links": [
|
||||
104
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "Wan22ImageToVideoLatent"
|
||||
},
|
||||
"widgets_values": [
|
||||
1280,
|
||||
704,
|
||||
121,
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 56,
|
||||
"type": "LoadImage",
|
||||
"pos": [
|
||||
0,
|
||||
540
|
||||
],
|
||||
"size": [
|
||||
274.080078125,
|
||||
314
|
||||
],
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 4,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "IMAGE",
|
||||
"type": "IMAGE",
|
||||
"links": [
|
||||
106
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MASK",
|
||||
"type": "MASK",
|
||||
"links": null
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "LoadImage"
|
||||
},
|
||||
"widgets_values": [
|
||||
"example.png",
|
||||
"image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"type": "CLIPTextEncode",
|
||||
"pos": [
|
||||
380,
|
||||
260
|
||||
],
|
||||
"size": [
|
||||
425.27801513671875,
|
||||
180.6060791015625
|
||||
],
|
||||
"flags": {},
|
||||
"order": 7,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"link": 75
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "CONDITIONING",
|
||||
"type": "CONDITIONING",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
52
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "CLIP Text Encode (Negative Prompt)",
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "CLIPTextEncode"
|
||||
},
|
||||
"widgets_values": [
|
||||
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
|
||||
],
|
||||
"color": "#322",
|
||||
"bgcolor": "#533"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "CLIPTextEncode",
|
||||
"pos": [
|
||||
380,
|
||||
50
|
||||
],
|
||||
"size": [
|
||||
422.84503173828125,
|
||||
164.31304931640625
|
||||
],
|
||||
"flags": {},
|
||||
"order": 6,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "clip",
|
||||
"type": "CLIP",
|
||||
"link": 74
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "CONDITIONING",
|
||||
"type": "CONDITIONING",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
46
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "CLIP Text Encode (Positive Prompt)",
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "CLIPTextEncode"
|
||||
},
|
||||
"widgets_values": [
|
||||
"Low contrast. In a retro 1970s-style subway station, a street musician plays in dim colors and rough textures. He wears an old jacket, playing guitar with focus. Commuters hurry by, and a small crowd gathers to listen. The camera slowly moves right, capturing the blend of music and city noise, with old subway signs and mottled walls in the background."
|
||||
],
|
||||
"color": "#232",
|
||||
"bgcolor": "#353"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "KSampler",
|
||||
"pos": [
|
||||
850,
|
||||
130
|
||||
],
|
||||
"size": [
|
||||
315,
|
||||
262
|
||||
],
|
||||
"flags": {},
|
||||
"order": 9,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "MODEL",
|
||||
"link": 95
|
||||
},
|
||||
{
|
||||
"name": "positive",
|
||||
"type": "CONDITIONING",
|
||||
"link": 46
|
||||
},
|
||||
{
|
||||
"name": "negative",
|
||||
"type": "CONDITIONING",
|
||||
"link": 52
|
||||
},
|
||||
{
|
||||
"name": "latent_image",
|
||||
"type": "LATENT",
|
||||
"link": 104
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "LATENT",
|
||||
"type": "LATENT",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
35
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "KSampler"
|
||||
},
|
||||
"widgets_values": [
|
||||
898471028164125,
|
||||
"randomize",
|
||||
20,
|
||||
5,
|
||||
"uni_pc",
|
||||
"simple",
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 48,
|
||||
"type": "ModelSamplingSD3",
|
||||
"pos": [
|
||||
850,
|
||||
20
|
||||
],
|
||||
"size": [
|
||||
210,
|
||||
58
|
||||
],
|
||||
"flags": {
|
||||
"collapsed": false
|
||||
},
|
||||
"order": 5,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "MODEL",
|
||||
"link": 94
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "MODEL",
|
||||
"type": "MODEL",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
95
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.3.45",
|
||||
"Node name for S&R": "ModelSamplingSD3"
|
||||
},
|
||||
"widgets_values": [
|
||||
8
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 59,
|
||||
"type": "MarkdownNote",
|
||||
"pos": [
|
||||
-550,
|
||||
10
|
||||
],
|
||||
"size": [
|
||||
480,
|
||||
340
|
||||
],
|
||||
"flags": {},
|
||||
"order": 4,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [],
|
||||
"title": "Model Links",
|
||||
"properties": {},
|
||||
"widgets_values": [
|
||||
"[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n) \n\n**Diffusion Model**\n- [wan2.2_ti2v_5B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors)\n\n**VAE**\n- [wan2.2_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan2.2_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ └───wan2.2_ti2v_5B_fp16.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan2.2_vae.safetensors\n```\n"
|
||||
],
|
||||
"color": "#432",
|
||||
"bgcolor": "#653"
|
||||
}
|
||||
],
|
||||
"links": [
|
||||
[
|
||||
35,
|
||||
3,
|
||||
0,
|
||||
8,
|
||||
0,
|
||||
"LATENT"
|
||||
],
|
||||
[
|
||||
46,
|
||||
6,
|
||||
0,
|
||||
3,
|
||||
1,
|
||||
"CONDITIONING"
|
||||
],
|
||||
[
|
||||
52,
|
||||
7,
|
||||
0,
|
||||
3,
|
||||
2,
|
||||
"CONDITIONING"
|
||||
],
|
||||
[
|
||||
74,
|
||||
38,
|
||||
0,
|
||||
6,
|
||||
0,
|
||||
"CLIP"
|
||||
],
|
||||
[
|
||||
75,
|
||||
38,
|
||||
0,
|
||||
7,
|
||||
0,
|
||||
"CLIP"
|
||||
],
|
||||
[
|
||||
76,
|
||||
39,
|
||||
0,
|
||||
8,
|
||||
1,
|
||||
"VAE"
|
||||
],
|
||||
[
|
||||
94,
|
||||
37,
|
||||
0,
|
||||
48,
|
||||
0,
|
||||
"MODEL"
|
||||
],
|
||||
[
|
||||
95,
|
||||
48,
|
||||
0,
|
||||
3,
|
||||
0,
|
||||
"MODEL"
|
||||
],
|
||||
[
|
||||
104,
|
||||
55,
|
||||
0,
|
||||
3,
|
||||
3,
|
||||
"LATENT"
|
||||
],
|
||||
[
|
||||
105,
|
||||
39,
|
||||
0,
|
||||
55,
|
||||
0,
|
||||
"VAE"
|
||||
],
|
||||
[
|
||||
106,
|
||||
56,
|
||||
0,
|
||||
55,
|
||||
1,
|
||||
"IMAGE"
|
||||
],
|
||||
[
|
||||
107,
|
||||
8,
|
||||
0,
|
||||
57,
|
||||
0,
|
||||
"IMAGE"
|
||||
],
|
||||
[
|
||||
108,
|
||||
57,
|
||||
0,
|
||||
58,
|
||||
0,
|
||||
"VIDEO"
|
||||
]
|
||||
],
|
||||
"groups": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Step1 - Load models",
|
||||
"bounding": [
|
||||
-50,
|
||||
-20,
|
||||
400,
|
||||
453.6000061035156
|
||||
],
|
||||
"color": "#3f789e",
|
||||
"font_size": 24,
|
||||
"flags": {}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Step3 - Prompt",
|
||||
"bounding": [
|
||||
370,
|
||||
-20,
|
||||
448.27801513671875,
|
||||
473.2060852050781
|
||||
],
|
||||
"color": "#3f789e",
|
||||
"font_size": 24,
|
||||
"flags": {}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "For i2v, use Ctrl + B to enable",
|
||||
"bounding": [
|
||||
-50,
|
||||
450,
|
||||
400,
|
||||
420
|
||||
],
|
||||
"color": "#3f789e",
|
||||
"font_size": 24,
|
||||
"flags": {}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Video Size & length",
|
||||
"bounding": [
|
||||
370,
|
||||
470,
|
||||
291.9127197265625,
|
||||
233.60000610351562
|
||||
],
|
||||
"color": "#3f789e",
|
||||
"font_size": 24,
|
||||
"flags": {}
|
||||
}
|
||||
],
|
||||
"config": {},
|
||||
"extra": {
|
||||
"ds": {
|
||||
"scale": 0.46462425349300085,
|
||||
"offset": [
|
||||
847.5372059811432,
|
||||
288.7938392118285
|
||||
]
|
||||
},
|
||||
"frontendVersion": "1.27.10",
|
||||
"VHS_latentpreview": false,
|
||||
"VHS_latentpreviewrate": 0,
|
||||
"VHS_MetadataImage": true,
|
||||
"VHS_KeepIntermediate": true
|
||||
},
|
||||
"version": 0.4
|
||||
}
|
||||
BIN
comfyui/workflows/image-to-video/wan22-animate-ref-image.png
Normal file
|
After Width: | Height: | Size: 906 KiB |
BIN
comfyui/workflows/image-to-video/wan22-flf2v-end.png
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
comfyui/workflows/image-to-video/wan22-flf2v-start.png
Normal file
|
After Width: | Height: | Size: 2.0 MiB |
BIN
comfyui/workflows/image-to-video/wan22-fun-camera-input.jpg
Normal file
|
After Width: | Height: | Size: 925 KiB |
BIN
comfyui/workflows/image-to-video/wan22-i2v-input.jpg
Normal file
|
After Width: | Height: | Size: 712 KiB |
@@ -169,6 +169,301 @@ model_categories:
|
||||
- source: "svd_xt.safetensors"
|
||||
dest: "svd_xt.safetensors"
|
||||
|
||||
# HunyuanVideo - Original (720p, T2V/I2V)
|
||||
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||
description: HunyuanVideo T2V - 720p text-to-video with MLLM encoders
|
||||
size_gb: 20
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: bf16
|
||||
vram_gb: 24
|
||||
frames: 129
|
||||
resolution: 720p
|
||||
notes: 5-second T2V generation with Chinese/English support, DiT architecture with 3D VAE
|
||||
files:
|
||||
- source: "split_files/diffusion_models/hunyuan_video_t2v_720p_bf16.safetensors"
|
||||
dest: "hunyuan_video_t2v_720p_bf16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||
description: HunyuanVideo I2V v1 - 720p image-to-video (concat method)
|
||||
size_gb: 20
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: bf16
|
||||
vram_gb: 24
|
||||
frames: 129
|
||||
resolution: 720p
|
||||
notes: Static image to video with concat conditioning, better motion fluidity
|
||||
files:
|
||||
- source: "split_files/diffusion_models/hunyuan_video_image_to_video_720p_bf16.safetensors"
|
||||
dest: "hunyuan_video_image_to_video_720p_bf16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||
description: HunyuanVideo I2V v2 - 720p image-to-video (replace method)
|
||||
size_gb: 20
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: bf16
|
||||
vram_gb: 24
|
||||
frames: 129
|
||||
resolution: 720p
|
||||
notes: Updated I2V with replace conditioning, better image guidance adherence
|
||||
files:
|
||||
- source: "split_files/diffusion_models/hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors"
|
||||
dest: "hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors"
|
||||
|
||||
# HunyuanVideo 1.5 - Latest generation (720p/1080p, T2V/I2V)
|
||||
- repo_id: Comfy-Org/HunyuanVideo_1.5_repackaged
|
||||
description: HunyuanVideo 1.5 T2V - 720p text-to-video (8.3B parameters)
|
||||
size_gb: 18
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp16
|
||||
vram_gb: 24
|
||||
frames: 129-257
|
||||
resolution: 720p
|
||||
notes: 5-10 second T2V with Qwen 2.5 VL encoder, requires 24GB VRAM
|
||||
files:
|
||||
- source: "hunyuanvideo1.5_720p_t2v_fp16.safetensors"
|
||||
dest: "hunyuanvideo1.5_720p_t2v_fp16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_1.5_repackaged
|
||||
description: HunyuanVideo 1.5 SR - 1080p super-resolution (distilled)
|
||||
size_gb: 18
|
||||
essential: false
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp16
|
||||
vram_gb: 24
|
||||
frames: 129-257
|
||||
resolution: 1080p
|
||||
notes: Upscales 720p to 1080p with distilled model for faster generation
|
||||
files:
|
||||
- source: "hunyuanvideo1.5_1080p_sr_distilled_fp16.safetensors"
|
||||
dest: "hunyuanvideo1.5_1080p_sr_distilled_fp16.safetensors"
|
||||
|
||||
# Wan2.2 5B - Hybrid text+image to video (low VRAM)
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 TI2V 5B - Hybrid text+image to video (8GB VRAM)
|
||||
size_gb: 10
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp16
|
||||
vram_gb: 8
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Efficient 5B model with native offloading, dual-expert architecture
|
||||
files:
|
||||
- source: "wan2.2_ti2v_5B_fp16.safetensors"
|
||||
dest: "wan2.2_ti2v_5B_fp16.safetensors"
|
||||
|
||||
# Wan2.2 14B T2V - Dual-expert text-to-video
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 T2V High Noise 14B - Text-to-video high noise expert (FP8)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Dual-expert T2V high noise denoising, FP8 quantized for 24GB GPU
|
||||
files:
|
||||
- source: "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 T2V Low Noise 14B - Text-to-video low noise expert (FP8)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Dual-expert T2V low noise refinement, FP8 quantized for 24GB GPU
|
||||
files:
|
||||
- source: "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
# Wan2.2 14B I2V - Image-to-video with content consistency
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 I2V High Noise 14B - Image-to-video high noise expert (FP16)
|
||||
size_gb: 28
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp16
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Dual-expert I2V high noise denoising with content consistency
|
||||
files:
|
||||
- source: "wan2.2_i2v_high_noise_14B_fp16.safetensors"
|
||||
dest: "wan2.2_i2v_high_noise_14B_fp16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 I2V Low Noise 14B - Image-to-video low noise expert (FP16)
|
||||
size_gb: 28
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp16
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Dual-expert I2V low noise refinement with content consistency
|
||||
files:
|
||||
- source: "wan2.2_i2v_low_noise_14B_fp16.safetensors"
|
||||
dest: "wan2.2_i2v_low_noise_14B_fp16.safetensors"
|
||||
|
||||
# Wan2.2 14B Animate - Video-to-video character animation
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Animate 14B - Video-to-video character animation (BF16)
|
||||
size_gb: 28
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: bf16
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: multiples of 16
|
||||
notes: V2V animation with Mix/Move modes, requires CLIP Vision H for reference image
|
||||
files:
|
||||
- source: "wan2.2_animate_14B_bf16.safetensors"
|
||||
dest: "wan2.2_animate_14B_bf16.safetensors"
|
||||
|
||||
# Wan2.2 14B S2V - Sound-to-video synchronization
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 S2V 14B - Sound-to-video with audio sync (FP8)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Transforms static images + audio into synchronized videos, uses Wav2Vec2 audio encoder
|
||||
files:
|
||||
- source: "wan2.2_s2v_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_s2v_14B_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 S2V 14B - Sound-to-video with audio sync (BF16 quality)
|
||||
size_gb: 28
|
||||
essential: false
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: bf16
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Higher quality BF16 version of S2V for better output quality
|
||||
files:
|
||||
- source: "wan2.2_s2v_14B_bf16.safetensors"
|
||||
dest: "wan2.2_s2v_14B_bf16.safetensors"
|
||||
|
||||
# Wan2.2 14B Fun Inpaint - Start-end frame controlled generation
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Fun Inpaint High Noise 14B - Start-end frame transition (FP8)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Generates transition between start and end frames with high noise denoising
|
||||
files:
|
||||
- source: "wan2.2_fun_inpaint_high_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_fun_inpaint_high_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Fun Inpaint Low Noise 14B - Start-end frame transition (FP8)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: Generates transition between start and end frames with low noise refinement
|
||||
files:
|
||||
- source: "wan2.2_fun_inpaint_low_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_fun_inpaint_low_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
# Wan2.2 14B Fun Control - ControlNet-style conditioning
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Fun Control High Noise 14B - Control conditions (Canny/Depth/Pose/MLSD/trajectory)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: I2V with control conditions (Canny, Depth, OpenPose, MLSD, trajectory), requires controlnet_aux
|
||||
files:
|
||||
- source: "wan2.2_fun_control_high_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_fun_control_high_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Fun Control Low Noise 14B - Control conditions (Canny/Depth/Pose/MLSD/trajectory)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: I2V with control conditions low noise refinement
|
||||
files:
|
||||
- source: "wan2.2_fun_control_low_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_fun_control_low_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
# Wan2.2 14B Fun Camera - Camera motion control
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Fun Camera High Noise 14B - Camera motion control (pan/zoom/static)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: I2V with camera motion control (pan, zoom, static), 108s with LoRA / 536s without
|
||||
files:
|
||||
- source: "wan2.2_fun_camera_high_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_fun_camera_high_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 Fun Camera Low Noise 14B - Camera motion control (pan/zoom/static)
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: video
|
||||
type: diffusion_models
|
||||
format: fp8_scaled
|
||||
vram_gb: 24
|
||||
frames: 81
|
||||
resolution: 640x640
|
||||
notes: I2V with camera motion control low noise refinement
|
||||
files:
|
||||
- source: "wan2.2_fun_camera_low_noise_14B_fp8_scaled.safetensors"
|
||||
dest: "wan2.2_fun_camera_low_noise_14B_fp8_scaled.safetensors"
|
||||
|
||||
# ==========================================================================
|
||||
# AUDIO GENERATION MODELS
|
||||
# ==========================================================================
|
||||
@@ -383,6 +678,205 @@ model_categories:
|
||||
- source: "text_encoders/clip_g.safetensors"
|
||||
dest: "clip_g.safetensors"
|
||||
|
||||
# HunyuanVideo Support Models
|
||||
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||
description: HunyuanVideo VAE - 3D VAE for video encoding/decoding (BF16)
|
||||
size_gb: 1
|
||||
essential: true
|
||||
category: support
|
||||
type: vae
|
||||
format: bf16
|
||||
vram_gb: 2
|
||||
notes: 3D VAE autoencoder for HunyuanVideo models
|
||||
files:
|
||||
- source: "split_files/vae/hunyuan_video_vae_bf16.safetensors"
|
||||
dest: "hunyuan_video_vae_bf16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||
description: LLaVA LLaMA3 FP8 - Multimodal text encoder for HunyuanVideo
|
||||
size_gb: 8
|
||||
essential: true
|
||||
category: support
|
||||
type: text_encoders
|
||||
format: fp8_scaled
|
||||
vram_gb: 4
|
||||
notes: LLaVA LLaMA3-based text encoder with FP8 quantization
|
||||
files:
|
||||
- source: "split_files/text_encoders/llava_llama3_fp8_scaled.safetensors"
|
||||
dest: "llava_llama3_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||
description: LLaVA LLaMA3 Vision - Vision encoder for HunyuanVideo I2V
|
||||
size_gb: 2
|
||||
essential: true
|
||||
category: support
|
||||
type: clip_vision
|
||||
format: safetensors
|
||||
vram_gb: 2
|
||||
notes: Vision encoder for image-to-video conditioning
|
||||
files:
|
||||
- source: "split_files/clip_vision/llava_llama3_vision.safetensors"
|
||||
dest: "llava_llama3_vision.safetensors"
|
||||
|
||||
# HunyuanVideo 1.5 Support Models
|
||||
- repo_id: Comfy-Org/HunyuanVideo_1.5_repackaged
|
||||
description: HunyuanVideo 1.5 VAE - VAE for v1.5 models (FP16)
|
||||
size_gb: 1
|
||||
essential: true
|
||||
category: support
|
||||
type: vae
|
||||
format: fp16
|
||||
vram_gb: 2
|
||||
notes: VAE autoencoder for HunyuanVideo 1.5
|
||||
files:
|
||||
- source: "hunyuanvideo15_vae_fp16.safetensors"
|
||||
dest: "hunyuanvideo15_vae_fp16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_1.5_repackaged
|
||||
description: Qwen 2.5 VL 7B FP8 - Vision-language encoder for HunyuanVideo 1.5
|
||||
size_gb: 14
|
||||
essential: true
|
||||
category: support
|
||||
type: text_encoders
|
||||
format: fp8_scaled
|
||||
vram_gb: 8
|
||||
notes: Qwen 2.5 VL 7B text encoder with FP8 quantization
|
||||
files:
|
||||
- source: "qwen_2.5_vl_7b_fp8_scaled.safetensors"
|
||||
dest: "qwen_2.5_vl_7b_fp8_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/HunyuanVideo_1.5_repackaged
|
||||
description: ByT5 Small GlyphXL FP16 - Glyph-aware text encoder for HunyuanVideo 1.5
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: text_encoders
|
||||
format: fp16
|
||||
vram_gb: 1
|
||||
notes: ByT5 small text encoder with glyph awareness
|
||||
files:
|
||||
- source: "byt5_small_glyphxl_fp16.safetensors"
|
||||
dest: "byt5_small_glyphxl_fp16.safetensors"
|
||||
|
||||
# Wan2.2 Support Models
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan2.2 VAE - VAE for Wan2.2 5B models
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: vae
|
||||
format: safetensors
|
||||
vram_gb: 1
|
||||
notes: VAE autoencoder for Wan2.2 5B TI2V model
|
||||
files:
|
||||
- source: "wan2.2_vae.safetensors"
|
||||
dest: "wan2.2_vae.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wan 2.1 VAE - VAE for Wan2.2 14B models
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: vae
|
||||
format: safetensors
|
||||
vram_gb: 1
|
||||
notes: VAE autoencoder for all Wan2.2 14B models (T2V, I2V, S2V, Animate, etc.)
|
||||
files:
|
||||
- source: "wan_2.1_vae.safetensors"
|
||||
dest: "wan_2.1_vae.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.1_ComfyUI_repackaged
|
||||
description: UMT5-XXL FP8 - Text encoder for all Wan2.2 models
|
||||
size_gb: 10
|
||||
essential: true
|
||||
category: support
|
||||
type: text_encoders
|
||||
format: fp8_scaled
|
||||
vram_gb: 5
|
||||
notes: Shared text encoder for all Wan2.2 models (5B and 14B), FP8 quantized
|
||||
files:
|
||||
- source: "umt5_xxl_fp8_e4m3fn_scaled.safetensors"
|
||||
dest: "umt5_xxl_fp8_e4m3fn_scaled.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: CLIP Vision H - Vision encoder for Wan2.2 Animate mode
|
||||
size_gb: 4
|
||||
essential: true
|
||||
category: support
|
||||
type: clip_vision
|
||||
format: safetensors
|
||||
vram_gb: 2
|
||||
notes: CLIP Vision H for reference image in Wan2.2 Animate video-to-video
|
||||
files:
|
||||
- source: "clip_vision_h.safetensors"
|
||||
dest: "clip_vision_h.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Wav2Vec2 Large English FP16 - Audio encoder for Wan2.2 S2V
|
||||
size_gb: 1
|
||||
essential: true
|
||||
category: support
|
||||
type: audio_models
|
||||
format: fp16
|
||||
vram_gb: 2
|
||||
notes: Audio encoder for sound-to-video synchronization
|
||||
files:
|
||||
- source: "wav2vec2_large_english_fp16.safetensors"
|
||||
dest: "wav2vec2_large_english_fp16.safetensors"
|
||||
|
||||
# Wan2.2 LoRA Accelerators (4-step distillation)
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Lightx2v I2V Animate LoRA - 4-step acceleration for Wan2.2 Animate
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: loras
|
||||
format: bf16
|
||||
vram_gb: 1
|
||||
notes: 4-step LoRA for Wan2.2 Animate (480p, cfg distilled), 5x speedup
|
||||
files:
|
||||
- source: "lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors"
|
||||
dest: "lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Lightx2v T2V High Noise LoRA - 4-step acceleration for Wan2.2 T2V high noise
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: loras
|
||||
format: safetensors
|
||||
vram_gb: 1
|
||||
notes: 4-step LoRA for T2V high noise expert, v1.1
|
||||
files:
|
||||
- source: "wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors"
|
||||
dest: "wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Lightx2v I2V High Noise LoRA - 4-step acceleration for Wan2.2 I2V high noise
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: loras
|
||||
format: safetensors
|
||||
vram_gb: 1
|
||||
notes: 4-step LoRA for I2V/Fun Inpaint/Fun Control/Fun Camera high noise expert
|
||||
files:
|
||||
- source: "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors"
|
||||
dest: "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors"
|
||||
|
||||
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||
description: Lightx2v I2V Low Noise LoRA - 4-step acceleration for Wan2.2 I2V low noise
|
||||
size_gb: 0.5
|
||||
essential: true
|
||||
category: support
|
||||
type: loras
|
||||
format: safetensors
|
||||
vram_gb: 1
|
||||
notes: 4-step LoRA for I2V/Fun Inpaint/Fun Control/Fun Camera low noise expert
|
||||
files:
|
||||
- source: "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors"
|
||||
dest: "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors"
|
||||
|
||||
# ==========================================================================
|
||||
# ANIMATEDIFF MODELS
|
||||
# ==========================================================================
|
||||
|
||||