From 80a81aa12f61c36c718f6fe85065574063fc3e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 23 Nov 2025 09:36:15 +0100 Subject: [PATCH] feat: add CogVideoX-I2V and text encoders to model configuration - Add THUDM/CogVideoX-5b-I2V model entry (20GB, I2V-specific model) - Add T5-XXL FP16 text encoder for CogVideoX (9GB) - Add CLIP-L text encoder for CogVideoX and SD3 (1GB) - Add CLIP-G text encoder for SD3 (3GB) Note: CogVideoX models are auto-downloaded by DownloadAndLoadCogVideoModel node Text encoders already linked manually to /workspace/ComfyUI/models/text_encoders/ --- comfyui_models.yaml | 56 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/comfyui_models.yaml b/comfyui_models.yaml index 1003065..78ab3b2 100644 --- a/comfyui_models.yaml +++ b/comfyui_models.yaml @@ -119,11 +119,26 @@ model_categories: vram_gb: 20 frames: 49 resolution: 720p - notes: State-of-the-art text-to-video generation + notes: State-of-the-art text-to-video generation, auto-downloaded by DownloadAndLoadCogVideoModel node files: - source: "transformer/diffusion_pytorch_model.safetensors" dest: "cogvideox-5b-transformer.safetensors" + - repo_id: THUDM/CogVideoX-5b-I2V + description: CogVideoX-5B-I2V - Image-to-video generation + size_gb: 20 + essential: true + category: video + type: diffusion_models + format: fp16 + vram_gb: 20 + frames: 49 + resolution: 720p + notes: Image-to-video model, auto-downloaded by DownloadAndLoadCogVideoModel node + files: + - source: "transformer/diffusion_pytorch_model.safetensors" + dest: "cogvideox-5b-i2v-transformer.safetensors" + - repo_id: stabilityai/stable-video-diffusion-img2vid description: SVD - 14 frame image-to-video size_gb: 8 @@ -247,6 +262,45 @@ model_categories: - source: "model.safetensors" dest: "siglip-so400m-patch14-384.safetensors" + - repo_id: stabilityai/stable-diffusion-3.5-large + description: T5-XXL FP16 - For CogVideoX text encoding + size_gb: 9 + essential: true + category: support + type: text_encoders + format: fp16 + vram_gb: 4 + notes: T5 text encoder required for CogVideoX models + files: + - source: "text_encoders/t5xxl_fp16.safetensors" + dest: "t5xxl_fp16.safetensors" + + - repo_id: stabilityai/stable-diffusion-3.5-large + description: CLIP-L - For CogVideoX and SD3 + size_gb: 1 + essential: true + category: support + type: text_encoders + format: fp32 + vram_gb: 1 + notes: CLIP-L text encoder for CogVideoX and SD3 models + files: + - source: "text_encoders/clip_l.safetensors" + dest: "clip_l.safetensors" + + - repo_id: stabilityai/stable-diffusion-3.5-large + description: CLIP-G - For SD3 models + size_gb: 3 + essential: false + category: support + type: text_encoders + format: fp32 + vram_gb: 2 + notes: CLIP-G text encoder for SD3 models + files: + - source: "text_encoders/clip_g.safetensors" + dest: "clip_g.safetensors" + # ========================================================================== # ANIMATEDIFF MODELS # ==========================================================================