feat: add dedicated CivitAI NSFW model downloader

- Add models_civitai.yaml with 6 NSFW SDXL checkpoints - Create artifact_civitai_download.sh with beautiful purple/magenta CLI - Update .env.example with CIVITAI_API_KEY documentation - Update CLAUDE.md with CivitAI usage instructions - Rename comfyui_models.yaml to models_huggingface.yaml for clarity Features: - Dedicated config and downloader for CivitAI models - Same elegant architecture as HuggingFace downloader - Retry logic, rate limiting, progress bars - Models: LUSTIFY, Pony Diffusion V6, RealVisXL, etc. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 17:58:25 +01:00
parent 76cf5b5e31
commit e29f77c90b
4 changed files with 302 additions and 0 deletions
--- a/models_huggingface.yaml
+++ b/models_huggingface.yaml
@@ -0,0 +1,579 @@
+# ============================================================================
+# ComfyUI Model Configuration
+# ============================================================================
+#
+# This configuration file defines all available ComfyUI models for download.
+# Models are organized by category: image, video, audio, and support models.
+#
+# Each model entry contains:
+#   - repo_id: HuggingFace repository identifier
+#   - description: Human-readable description
+#   - size_gb: Approximate size in gigabytes
+#   - essential: Whether this is an essential model (true/false)
+#   - category: Model category (image/video/audio/support)
+#
+# ============================================================================
+
+# Global settings
+settings:
+  cache_dir: /workspace/huggingface_cache
+  parallel_downloads: 1
+  retry_attempts: 3
+  timeout_seconds: 3600
+
+# Model categories
+model_categories:
+  # ==========================================================================
+  # IMAGE GENERATION MODELS
+  # ==========================================================================
+  image_models:
+    - repo_id: black-forest-labs/FLUX.1-schnell
+      description: FLUX.1 Schnell - Fast 4-step inference
+      size_gb: 23
+      essential: true
+      category: image
+      type: unet
+      format: fp16
+      vram_gb: 23
+      notes: Industry-leading image generation quality
+      files:
+        - source: "flux1-schnell.safetensors"
+          dest: "flux1-schnell.safetensors"
+
+    - repo_id: black-forest-labs/FLUX.1-dev
+      description: FLUX.1 Dev - Balanced quality/speed
+      size_gb: 23
+      essential: false
+      category: image
+      type: unet
+      format: fp16
+      vram_gb: 23
+      notes: Development version with enhanced features
+      files:
+        - source: "flux1-dev.safetensors"
+          dest: "flux1-dev.safetensors"
+
+    - repo_id: runwayml/stable-diffusion-v1-5
+      description: SD 1.5 - For AnimateDiff
+      size_gb: 4
+      essential: true
+      category: image
+      type: checkpoints
+      format: fp16
+      vram_gb: 8
+      notes: Stable Diffusion 1.5 required for AnimateDiff motion modules
+      files:
+        - source: "v1-5-pruned-emaonly.safetensors"
+          dest: "v1-5-pruned-emaonly.safetensors"
+
+    - repo_id: stabilityai/stable-diffusion-xl-base-1.0
+      description: SDXL Base 1.0 - Industry standard
+      size_gb: 7
+      essential: true
+      category: image
+      type: checkpoints
+      format: fp16
+      vram_gb: 12
+      notes: Most widely used Stable Diffusion model
+      files:
+        - source: "sd_xl_base_1.0.safetensors"
+          dest: "sd_xl_base_1.0.safetensors"
+
+    - repo_id: stabilityai/stable-diffusion-xl-refiner-1.0
+      description: SDXL Refiner 1.0 - Enhances base output
+      size_gb: 6
+      essential: false
+      category: image
+      type: checkpoints
+      format: fp16
+      vram_gb: 12
+      notes: Use after SDXL base for improved details
+      files:
+        - source: "sd_xl_refiner_1.0.safetensors"
+          dest: "sd_xl_refiner_1.0.safetensors"
+
+    - repo_id: stabilityai/stable-diffusion-3.5-large
+      description: SD 3.5 Large - Latest Stability AI
+      size_gb: 18
+      essential: false
+      category: image
+      type: checkpoints
+      format: fp16
+      vram_gb: 20
+      notes: Newest generation Stable Diffusion
+      files:
+        - source: "sd3.5_large.safetensors"
+          dest: "sd3.5_large.safetensors"
+
+  # ==========================================================================
+  # VIDEO GENERATION MODELS
+  # ==========================================================================
+  video_models:
+    - repo_id: THUDM/CogVideoX-5b
+      description: CogVideoX-5B - Professional text-to-video
+      size_gb: 20
+      essential: true
+      category: video
+      type: diffusion_models
+      format: fp16
+      vram_gb: 20
+      frames: 49
+      resolution: 720p
+      notes: State-of-the-art text-to-video generation, auto-downloaded by DownloadAndLoadCogVideoModel node
+      files:
+        - source: "transformer/diffusion_pytorch_model.safetensors"
+          dest: "cogvideox-5b-transformer.safetensors"
+
+    - repo_id: THUDM/CogVideoX-5b-I2V
+      description: CogVideoX-5B-I2V - Image-to-video generation
+      size_gb: 20
+      essential: true
+      category: video
+      type: diffusion_models
+      format: fp16
+      vram_gb: 20
+      frames: 49
+      resolution: 720p
+      notes: Image-to-video model, auto-downloaded by DownloadAndLoadCogVideoModel node
+      files:
+        - source: "transformer/diffusion_pytorch_model.safetensors"
+          dest: "cogvideox-5b-i2v-transformer.safetensors"
+
+    - repo_id: stabilityai/stable-video-diffusion-img2vid
+      description: SVD - 14 frame image-to-video
+      size_gb: 8
+      essential: true
+      category: video
+      type: checkpoints
+      format: fp16
+      vram_gb: 20
+      frames: 14
+      resolution: 576x1024
+      notes: Convert images to short video clips
+      files:
+        - source: "svd.safetensors"
+          dest: "svd.safetensors"
+
+    - repo_id: stabilityai/stable-video-diffusion-img2vid-xt
+      description: SVD-XT - 25 frame image-to-video
+      size_gb: 8
+      essential: false
+      category: video
+      type: checkpoints
+      format: fp16
+      vram_gb: 20
+      frames: 25
+      resolution: 576x1024
+      notes: Extended frame count version
+      files:
+        - source: "svd_xt.safetensors"
+          dest: "svd_xt.safetensors"
+
+  # ==========================================================================
+  # AUDIO GENERATION MODELS
+  # ==========================================================================
+  audio_models:
+    - repo_id: facebook/musicgen-small
+      description: MusicGen Small - Fast generation
+      size_gb: 3
+      essential: false
+      category: audio
+      type: musicgen
+      format: fp32
+      vram_gb: 4
+      duration_seconds: 30
+      notes: Fastest music generation, lower quality
+      files:
+        - source: "pytorch_model.bin"
+          dest: "musicgen-small-pytorch_model.bin"
+
+    - repo_id: facebook/musicgen-medium
+      description: MusicGen Medium - Balanced quality
+      size_gb: 11
+      essential: true
+      category: audio
+      type: musicgen
+      format: fp32
+      vram_gb: 8
+      duration_seconds: 30
+      notes: Best balance of speed and quality
+      files:
+        - source: "pytorch_model.bin"
+          dest: "musicgen-medium-pytorch_model.bin"
+
+    - repo_id: facebook/musicgen-large
+      description: MusicGen Large - Highest quality
+      size_gb: 22
+      essential: false
+      category: audio
+      type: musicgen
+      format: fp32
+      vram_gb: 16
+      duration_seconds: 30
+      notes: Best quality, slower generation
+      files:
+        - source: "pytorch_model-00001-of-00002.bin"
+          dest: "musicgen-large-pytorch_model-00001-of-00002.bin"
+        - source: "pytorch_model-00002-of-00002.bin"
+          dest: "musicgen-large-pytorch_model-00002-of-00002.bin"
+        - source: "pytorch_model.bin.index.json"
+          dest: "musicgen-large-pytorch_model.bin.index.json"
+
+  # ==========================================================================
+  # SUPPORT MODELS (CLIP, IP-Adapter, etc.)
+  # ==========================================================================
+  support_models:
+    - repo_id: openai/clip-vit-large-patch14
+      description: CLIP H - For SD 1.5 IP-Adapter
+      size_gb: 2
+      essential: true
+      category: support
+      type: clip_vision
+      format: fp32
+      vram_gb: 2
+      notes: Text-image understanding model for IP-Adapter
+      files:
+        - source: "model.safetensors"
+          dest: "CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors"
+
+    - repo_id: laion/CLIP-ViT-bigG-14-laion2B-39B-b160k
+      description: CLIP G - For SDXL IP-Adapter
+      size_gb: 7
+      essential: true
+      category: support
+      type: clip_vision
+      format: fp32
+      vram_gb: 4
+      notes: Larger CLIP model for SDXL IP-Adapter
+      files:
+        - source: "open_clip_model.safetensors"
+          dest: "CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors"
+
+    - repo_id: google/siglip-so400m-patch14-384
+      description: SigLIP - For FLUX models
+      size_gb: 2
+      essential: true
+      category: support
+      type: clip_vision
+      format: fp32
+      vram_gb: 2
+      notes: Advanced image-text alignment
+      files:
+        - source: "model.safetensors"
+          dest: "siglip-so400m-patch14-384.safetensors"
+
+    - repo_id: stabilityai/stable-diffusion-3.5-large
+      description: CLIP-L and T5-XXL - For FLUX text encoding
+      size_gb: 10
+      essential: true
+      category: support
+      type: clip
+      format: fp16
+      vram_gb: 4
+      notes: CLIP text encoders required for FLUX models
+      files:
+        - source: "text_encoders/clip_l.safetensors"
+          dest: "clip_l.safetensors"
+        - source: "text_encoders/t5xxl_fp16.safetensors"
+          dest: "t5xxl_fp16.safetensors"
+
+    - repo_id: black-forest-labs/FLUX.1-schnell
+      description: FLUX VAE - Autoencoder for FLUX models
+      size_gb: 0.5
+      essential: true
+      category: support
+      type: vae
+      format: safetensors
+      vram_gb: 1
+      notes: VAE autoencoder required for FLUX image decoding
+      files:
+        - source: "ae.safetensors"
+          dest: "ae.safetensors"
+
+    - repo_id: ai-forever/Real-ESRGAN
+      description: RealESRGAN x2 - 2x upscaling model
+      size_gb: 0.06
+      essential: true
+      category: support
+      type: upscale_models
+      format: pth
+      vram_gb: 2
+      notes: Fast 2x upscaling model for general purpose enhancement
+      files:
+        - source: "RealESRGAN_x2.pth"
+          dest: "RealESRGAN_x2.pth"
+
+    - repo_id: ai-forever/Real-ESRGAN
+      description: RealESRGAN x4 - 4x upscaling model
+      size_gb: 0.06
+      essential: true
+      category: support
+      type: upscale_models
+      format: pth
+      vram_gb: 4
+      notes: High-quality 4x upscaling model for detail enhancement
+      files:
+        - source: "RealESRGAN_x4.pth"
+          dest: "RealESRGAN_x4.pth"
+
+    - repo_id: stabilityai/stable-diffusion-3.5-large
+      description: T5-XXL FP16 - For CogVideoX text encoding
+      size_gb: 9
+      essential: true
+      category: support
+      type: text_encoders
+      format: fp16
+      vram_gb: 4
+      notes: T5 text encoder required for CogVideoX models
+      files:
+        - source: "text_encoders/t5xxl_fp16.safetensors"
+          dest: "t5xxl_fp16.safetensors"
+
+    - repo_id: stabilityai/stable-diffusion-3.5-large
+      description: CLIP-L - For CogVideoX and SD3
+      size_gb: 1
+      essential: true
+      category: support
+      type: text_encoders
+      format: fp32
+      vram_gb: 1
+      notes: CLIP-L text encoder for CogVideoX and SD3 models
+      files:
+        - source: "text_encoders/clip_l.safetensors"
+          dest: "clip_l.safetensors"
+
+    - repo_id: stabilityai/stable-diffusion-3.5-large
+      description: CLIP-G - For SD3 models
+      size_gb: 3
+      essential: false
+      category: support
+      type: text_encoders
+      format: fp32
+      vram_gb: 2
+      notes: CLIP-G text encoder for SD3 models
+      files:
+        - source: "text_encoders/clip_g.safetensors"
+          dest: "clip_g.safetensors"
+
+  # ==========================================================================
+  # ANIMATEDIFF MODELS
+  # ==========================================================================
+  animatediff_models:
+    - repo_id: guoyww/animatediff
+      description: AnimateDiff Motion Modules
+      size_gb: 2
+      essential: true
+      category: animatediff
+      type: animatediff_models
+      filename: mm_sd_v15
+      format: safetensors
+      vram_gb: 4
+      notes: Motion modules for AnimateDiff text-to-video
+      files:
+        - source: "mm_sd_v15_v2.ckpt"
+          dest: "mm_sd_v15_v2.ckpt"
+
+  # ==========================================================================
+  # CONTROLNET MODELS
+  # ==========================================================================
+  controlnet_models:
+    - repo_id: lllyasviel/control_v11p_sd15_canny
+      description: ControlNet Canny - Edge detection control for SD 1.5
+      size_gb: 1.5
+      essential: false
+      category: controlnet
+      type: controlnet
+      format: safetensors
+      vram_gb: 2
+      notes: Precise edge-based composition control
+      files:
+        - source: "diffusion_pytorch_model.safetensors"
+          dest: "control_v11p_sd15_canny.safetensors"
+
+    - repo_id: lllyasviel/control_v11f1p_sd15_depth
+      description: ControlNet Depth - Depth map control for SD 1.5
+      size_gb: 1.5
+      essential: false
+      category: controlnet
+      type: controlnet
+      format: safetensors
+      vram_gb: 2
+      notes: Depth-based spatial control
+      files:
+        - source: "diffusion_pytorch_model.safetensors"
+          dest: "control_v11p_sd15_depth.safetensors"
+
+    - repo_id: diffusers/controlnet-canny-sdxl-1.0
+      description: ControlNet Canny SDXL - Edge detection for SDXL
+      size_gb: 2.5
+      essential: false
+      category: controlnet
+      type: controlnet
+      format: safetensors
+      vram_gb: 3
+      notes: Canny edge control for SDXL models
+      files:
+        - source: "diffusion_pytorch_model.safetensors"
+          dest: "controlnet-canny-sdxl-1.0.safetensors"
+
+    - repo_id: diffusers/controlnet-depth-sdxl-1.0
+      description: ControlNet Depth SDXL - Depth map for SDXL
+      size_gb: 2.5
+      essential: false
+      category: controlnet
+      type: controlnet
+      format: safetensors
+      vram_gb: 3
+      notes: Depth control for SDXL models
+      files:
+        - source: "diffusion_pytorch_model.safetensors"
+          dest: "controlnet-depth-sdxl-1.0.safetensors"
+
+  # ==========================================================================
+  # IP-ADAPTER MODELS
+  # ==========================================================================
+  ipadapter_models:
+    - repo_id: h94/IP-Adapter
+      description: IP-Adapter SDXL Base - Style & Composition
+      size_gb: 1.3
+      essential: true
+      category: ipadapter
+      type: ipadapter
+      format: safetensors
+      vram_gb: 4
+      notes: Basic IP-Adapter for SDXL
+      files:
+        - source: "sdxl_models/ip-adapter_sdxl.safetensors"
+          dest: "ip-adapter_sdxl.safetensors"
+
+    - repo_id: h94/IP-Adapter
+      description: IP-Adapter SDXL VIT-H - For CLIP-ViT-H
+      size_gb: 0.9
+      essential: true
+      category: ipadapter
+      type: ipadapter
+      format: safetensors
+      vram_gb: 4
+      notes: IP-Adapter for SDXL with VIT-H CLIP vision model
+      files:
+        - source: "sdxl_models/ip-adapter_sdxl_vit-h.safetensors"
+          dest: "ip-adapter_sdxl_vit-h.safetensors"
+
+    - repo_id: h94/IP-Adapter
+      description: IP-Adapter SDXL Plus - High Strength Composition
+      size_gb: 0.9
+      essential: false
+      category: ipadapter
+      type: ipadapter
+      format: safetensors
+      vram_gb: 4
+      notes: Enhanced composition control with higher strength
+      files:
+        - source: "sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors"
+          dest: "ip-adapter-plus_sdxl_vit-h.safetensors"
+
+    - repo_id: h94/IP-Adapter
+      description: IP-Adapter SDXL Plus Face - Face-focused generation
+      size_gb: 0.5
+      essential: false
+      category: ipadapter
+      type: ipadapter
+      format: safetensors
+      vram_gb: 4
+      notes: Specialized for face transfer and portrait generation
+      files:
+        - source: "sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors"
+          dest: "ip-adapter-plus-face_sdxl_vit-h.safetensors"
+
+# ============================================================================
+# STORAGE & VRAM SUMMARIES
+# ============================================================================
+
+storage_requirements:
+  essential_only:
+    image: 30      # FLUX Schnell + SDXL Base
+    video: 28      # CogVideoX + SVD
+    audio: 11      # MusicGen Medium
+    support: 11    # All 3 CLIP models
+    total: 80      # Total essential storage
+
+  all_models:
+    image: 54      # All image models
+    video: 36      # All video models
+    audio: 36      # All audio models
+    support: 11    # All support models
+    total: 137     # Total with optional models
+
+vram_requirements:
+  # For 24GB GPU (RTX 4090)
+  simultaneous_loadable:
+    - name: Image Focus - FLUX FP16
+      models: [FLUX.1 Schnell]
+      vram_used: 23
+      remaining: 1
+
+    - name: Image Focus - FLUX FP8 + SDXL
+      models: [FLUX.1 Schnell FP8, SDXL Base]
+      vram_used: 24
+      remaining: 0
+
+    - name: Video Generation
+      models: [CogVideoX-5B optimized, SDXL]
+      vram_used: 24
+      remaining: 0
+
+    - name: Multi-Modal
+      models: [SDXL, MusicGen Medium]
+      vram_used: 20
+      remaining: 4
+
+# ============================================================================
+# INSTALLATION PROFILES
+# ============================================================================
+
+installation_profiles:
+  minimal:
+    description: Minimal setup for testing
+    categories: [support_models]
+    storage_gb: 11
+    estimated_time: 5-10 minutes
+
+  essential:
+    description: Essential models only (~80GB)
+    categories: [image_models, video_models, audio_models, support_models]
+    essential_only: true
+    storage_gb: 80
+    estimated_time: 1-2 hours
+
+  image_focused:
+    description: All image generation models
+    categories: [image_models, support_models]
+    storage_gb: 65
+    estimated_time: 45-90 minutes
+
+  video_focused:
+    description: All video generation models
+    categories: [video_models, image_models, support_models]
+    essential_only: true
+    storage_gb: 69
+    estimated_time: 1-2 hours
+
+  complete:
+    description: All models (including optional)
+    categories: [image_models, video_models, audio_models, support_models]
+    storage_gb: 137
+    estimated_time: 2-4 hours
+
+# ============================================================================
+# METADATA
+# ============================================================================
+
+metadata:
+  version: 1.0.0
+  last_updated: 2025-11-21
+  compatible_with:
+    - ComfyUI >= 0.1.0
+    - Python >= 3.10
+    - HuggingFace Hub >= 0.20.0
+  maintainer: Valknar
+  repository: https://github.com/yourusername/runpod