feat: add CogVideoX-I2V and text encoders to model configuration
- Add THUDM/CogVideoX-5b-I2V model entry (20GB, I2V-specific model) - Add T5-XXL FP16 text encoder for CogVideoX (9GB) - Add CLIP-L text encoder for CogVideoX and SD3 (1GB) - Add CLIP-G text encoder for SD3 (3GB) Note: CogVideoX models are auto-downloaded by DownloadAndLoadCogVideoModel node Text encoders already linked manually to /workspace/ComfyUI/models/text_encoders/
This commit is contained in:
@@ -119,11 +119,26 @@ model_categories:
|
|||||||
vram_gb: 20
|
vram_gb: 20
|
||||||
frames: 49
|
frames: 49
|
||||||
resolution: 720p
|
resolution: 720p
|
||||||
notes: State-of-the-art text-to-video generation
|
notes: State-of-the-art text-to-video generation, auto-downloaded by DownloadAndLoadCogVideoModel node
|
||||||
files:
|
files:
|
||||||
- source: "transformer/diffusion_pytorch_model.safetensors"
|
- source: "transformer/diffusion_pytorch_model.safetensors"
|
||||||
dest: "cogvideox-5b-transformer.safetensors"
|
dest: "cogvideox-5b-transformer.safetensors"
|
||||||
|
|
||||||
|
- repo_id: THUDM/CogVideoX-5b-I2V
|
||||||
|
description: CogVideoX-5B-I2V - Image-to-video generation
|
||||||
|
size_gb: 20
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
type: diffusion_models
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 20
|
||||||
|
frames: 49
|
||||||
|
resolution: 720p
|
||||||
|
notes: Image-to-video model, auto-downloaded by DownloadAndLoadCogVideoModel node
|
||||||
|
files:
|
||||||
|
- source: "transformer/diffusion_pytorch_model.safetensors"
|
||||||
|
dest: "cogvideox-5b-i2v-transformer.safetensors"
|
||||||
|
|
||||||
- repo_id: stabilityai/stable-video-diffusion-img2vid
|
- repo_id: stabilityai/stable-video-diffusion-img2vid
|
||||||
description: SVD - 14 frame image-to-video
|
description: SVD - 14 frame image-to-video
|
||||||
size_gb: 8
|
size_gb: 8
|
||||||
@@ -247,6 +262,45 @@ model_categories:
|
|||||||
- source: "model.safetensors"
|
- source: "model.safetensors"
|
||||||
dest: "siglip-so400m-patch14-384.safetensors"
|
dest: "siglip-so400m-patch14-384.safetensors"
|
||||||
|
|
||||||
|
- repo_id: stabilityai/stable-diffusion-3.5-large
|
||||||
|
description: T5-XXL FP16 - For CogVideoX text encoding
|
||||||
|
size_gb: 9
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
type: text_encoders
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 4
|
||||||
|
notes: T5 text encoder required for CogVideoX models
|
||||||
|
files:
|
||||||
|
- source: "text_encoders/t5xxl_fp16.safetensors"
|
||||||
|
dest: "t5xxl_fp16.safetensors"
|
||||||
|
|
||||||
|
- repo_id: stabilityai/stable-diffusion-3.5-large
|
||||||
|
description: CLIP-L - For CogVideoX and SD3
|
||||||
|
size_gb: 1
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
type: text_encoders
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 1
|
||||||
|
notes: CLIP-L text encoder for CogVideoX and SD3 models
|
||||||
|
files:
|
||||||
|
- source: "text_encoders/clip_l.safetensors"
|
||||||
|
dest: "clip_l.safetensors"
|
||||||
|
|
||||||
|
- repo_id: stabilityai/stable-diffusion-3.5-large
|
||||||
|
description: CLIP-G - For SD3 models
|
||||||
|
size_gb: 3
|
||||||
|
essential: false
|
||||||
|
category: support
|
||||||
|
type: text_encoders
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 2
|
||||||
|
notes: CLIP-G text encoder for SD3 models
|
||||||
|
files:
|
||||||
|
- source: "text_encoders/clip_g.safetensors"
|
||||||
|
dest: "clip_g.safetensors"
|
||||||
|
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
# ANIMATEDIFF MODELS
|
# ANIMATEDIFF MODELS
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user