From c9b01eef68d23f11767d51b570e57f2c58c413ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= <valknar@pivoine.art>
Date: Sat, 22 Nov 2025 00:31:26 +0100
Subject: [PATCH] refactor: consolidate model management into Ansible playbook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove flux/musicgen standalone implementations in favor of ComfyUI:
- Delete models/flux/ and models/musicgen/ directories
- Remove redundant scripts (install.sh, download-models.sh, prepare-template.sh)
- Update README.md to reference Ansible playbook commands
- Update playbook.yml to remove flux/musicgen service definitions
- Add COMFYUI_MODELS.md with comprehensive model installation guide
- Update stop-all.sh to only manage orchestrator and vLLM services

All model downloads and dependency management now handled via
Ansible playbook tags (base, python, vllm, comfyui, comfyui-essential).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 COMFYUI_MODELS.md                | 320 +++++++++++++++++++++
 README.md                        |  46 ++--
 models/flux/requirements.txt     |  21 --
 models/flux/server.py            | 193 -------------
 models/musicgen/requirements.txt |  11 -
 models/musicgen/server.py        | 172 ------------
 playbook.yml                     | 459 +++++++++++++++++++++++++++++--
 scripts/download-models.sh       |  36 ---
 scripts/install.sh               |  50 ----
 scripts/prepare-template.sh      | 314 ---------------------
 scripts/stop-all.sh              |   2 -
 systemd/ai-orchestrator.service  |  24 --
 12 files changed, 774 insertions(+), 874 deletions(-)
 create mode 100644 COMFYUI_MODELS.md
 delete mode 100644 models/flux/requirements.txt
 delete mode 100644 models/flux/server.py
 delete mode 100644 models/musicgen/requirements.txt
 delete mode 100644 models/musicgen/server.py
 delete mode 100644 scripts/download-models.sh
 delete mode 100644 scripts/install.sh
 delete mode 100644 scripts/prepare-template.sh
 delete mode 100644 systemd/ai-orchestrator.service

diff --git a/COMFYUI_MODELS.md b/COMFYUI_MODELS.md
new file mode 100644
index 0000000..52a2ee2
--- /dev/null
+++ b/COMFYUI_MODELS.md
@@ -0,0 +1,320 @@
+# ComfyUI Models & Nodes - Usage Guide
+
+This document explains how to use the extended playbook.yml for installing ComfyUI models and custom nodes.
+
+## What's Been Added
+
+### Model Categories
+
+**Image Generation Models:**
+- FLUX.1 Schnell (23GB, essential) - Fast 4-step inference
+- FLUX.1 Dev (23GB, optional) - Balanced quality/speed
+- SDXL Base 1.0 (7GB, essential) - Industry standard
+- SDXL Refiner 1.0 (6GB, optional) - Enhances base output
+- SD 3.5 Large (18GB, optional) - Latest Stability AI
+
+**Video Generation Models:**
+- CogVideoX-5B (20GB, essential) - Professional text-to-video
+- SVD (8GB, essential) - 14 frame image-to-video
+- SVD-XT (8GB, optional) - 25 frame image-to-video
+
+**Audio Generation Models:**
+- MusicGen Small (3GB, optional) - Fast generation
+- MusicGen Medium (11GB, essential) - Balanced quality
+- MusicGen Large (22GB, optional) - Highest quality
+
+**Support Models:**
+- CLIP H (2GB, essential) - For SD 1.5 IP-Adapter
+- CLIP G (7GB, essential) - For SDXL IP-Adapter
+- SigLIP (2GB, essential) - For FLUX models
+
+### Custom Nodes
+
+**Essential Nodes (installed by default):**
+- ComfyUI-Manager - Install/manage custom nodes and models
+- ComfyUI-VideoHelperSuite - Video operations
+- ComfyUI-AnimateDiff-Evolved - Video generation
+- ComfyUI_IPAdapter_plus - Style transfer
+- ComfyUI-Impact-Pack - Auto face enhancement
+- comfyui-sound-lab - Audio generation
+
+**Optional Nodes:**
+- ComfyUI-CogVideoXWrapper - CogVideoX integration
+- Comfyui-Inspire-Pack - Additional tools
+- ComfyUI-Advanced-ControlNet - Advanced control
+- ComfyUI-3D-Pack - 3D asset generation
+
+## Usage Examples
+
+### Quick Setup (Essential Only)
+
+Install ComfyUI with only essential models and nodes:
+
+```bash
+ansible-playbook playbook.yml --tags comfyui-essential
+```
+
+This downloads:
+- Image: FLUX Schnell (23GB) + SDXL Base (7GB)
+- Video: CogVideoX-5B (20GB) + SVD (8GB)
+- Audio: MusicGen Medium (11GB)
+- Support: All 3 CLIP models (11GB)
+- **Total: ~80GB**
+
+### Selective Installation
+
+#### Install ComfyUI Base Only
+
+```bash
+ansible-playbook playbook.yml --tags comfyui
+```
+
+#### Install All Image Models
+
+```bash
+ansible-playbook playbook.yml --tags comfyui-models-image
+```
+
+#### Install All Video Models
+
+```bash
+ansible-playbook playbook.yml --tags comfyui-models-video
+```
+
+#### Install All Audio Models
+
+```bash
+ansible-playbook playbook.yml --tags comfyui-models-audio
+```
+
+#### Install Support Models (CLIP, IP-Adapter)
+
+```bash
+ansible-playbook playbook.yml --tags comfyui-models-support
+```
+
+#### Install Custom Nodes
+
+```bash
+ansible-playbook playbook.yml --tags comfyui-nodes
+```
+
+### Combined Installation
+
+#### Full ComfyUI Setup
+
+```bash
+ansible-playbook playbook.yml --tags comfyui,comfyui-models-all,comfyui-nodes
+```
+
+#### ComfyUI + Image + Video Only
+
+```bash
+ansible-playbook playbook.yml --tags comfyui,comfyui-models-image,comfyui-models-video,comfyui-nodes
+```
+
+## Adding New Models
+
+### Add Image Model
+
+Edit `playbook.yml` and append to `comfyui_image_models`:
+
+```yaml
+- name: "author/model-name"
+  type: "checkpoint"
+  category: "image"
+  size_gb: 15
+  vram_gb: 18
+  format: "fp16"
+  description: "Model description"
+  essential: false  # Set to true for default installs
+```
+
+### Add Custom Node
+
+Append to `comfyui_custom_nodes`:
+
+```yaml
+- name: "NodeName"
+  repo: "https://github.com/author/repo.git"
+  category: "image"  # or video, audio, control, etc.
+  description: "What it does"
+  essential: false
+```
+
+## VRAM Requirements Summary
+
+For your **24GB GPU**, you can run:
+
+### Simultaneously Loadable Combinations
+
+**Option 1: Image Generation Focus**
+- FLUX Schnell FP16 (23GB) → Full model, leaves 1GB for operations
+- FLUX Schnell FP8 (12GB) + SDXL (12GB) → Both loaded
+- SDXL (12GB) + Refiner (12GB) → Both loaded with room
+
+**Option 2: Video Generation**
+- CogVideoX-5B with optimizations (12GB) + SDXL (12GB)
+- SVD-XT (20GB) → Good frame count
+
+**Option 3: Multi-Modal**
+- SDXL (12GB) + MusicGen Medium (8GB)
+- CogVideoX-5B (12GB) + MusicGen Small (4GB)
+
+## Storage Requirements
+
+### Essential Models Only
+- Image: 30GB
+- Video: 28GB
+- Audio: 11GB
+- Support: 11GB
+- **Total: ~80GB**
+
+### All Models (including optional)
+- Image: 54GB
+- Video: 36GB
+- Audio: 36GB
+- Support: 11GB
+- **Total: ~137GB**
+
+## Directory Structure Created
+
+```
+/workspace/ComfyUI/
+├── models/
+│   ├── checkpoints/        # FLUX, SDXL, SD3 models
+│   ├── unet/               # Diffusion U-Nets
+│   ├── vae/                # VAE models
+│   ├── loras/              # LoRA adapters
+│   ├── clip/               # CLIP text encoders
+│   ├── clip_vision/        # CLIP vision models
+│   ├── controlnet/         # ControlNet models
+│   ├── ipadapter/          # IP-Adapter models
+│   ├── embeddings/         # Text embeddings
+│   ├── upscale_models/     # Upscalers
+│   ├── video_models/       # Video generation models
+│   ├── animatediff_models/ # AnimateDiff models
+│   ├── audio_models/       # Audio generation models
+│   └── configs/            # Model configs
+└── custom_nodes/           # Extension nodes
+```
+
+## Extending the Playbook
+
+### Add a New Model Category
+
+1. Add variable list in `vars` section:
+
+```yaml
+comfyui_new_category_models:
+  - name: "model-repo-id"
+    type: "new_type"
+    category: "new_category"
+    size_gb: 10
+    description: "Description"
+    essential: true
+```
+
+2. Create download task section:
+
+```yaml
+- name: Download New Category Models
+  tags: [comfyui-models-newcat, comfyui-models-all]
+  block:
+    # Copy pattern from existing model download tasks
+```
+
+3. Update tags documentation at top of file
+
+## Model Download Notes
+
+- **Async Downloads**: Large models download asynchronously with 1-hour timeouts
+- **Smart Caching**: Models are only downloaded if not already cached
+- **HuggingFace Token**: Set `HF_TOKEN` environment variable for gated models
+- **Error Handling**: Download failures are ignored to allow partial installations
+
+## Performance Tips
+
+### For 24GB GPU:
+
+1. **Use FP8 Quantized FLUX** when you need multiple models loaded
+2. **Use Full FP16 FLUX** when you want maximum quality
+3. **Enable CogVideoX Optimizations** to fit in 12GB (edit model config)
+4. **Use GGUF variants** for FLUX to save VRAM (manually download)
+
+### Recommended First Install:
+
+```bash
+# Essential setup (~80GB, ~1 hour)
+ansible-playbook playbook.yml --tags comfyui-essential
+```
+
+Then add specific models as needed:
+
+```bash
+# Add video models later
+ansible-playbook playbook.yml --tags comfyui-models-video
+```
+
+## Troubleshooting
+
+### Downloads Failing
+
+Check HuggingFace token:
+```bash
+export HF_TOKEN="your_token_here"
+ansible-playbook playbook.yml --tags comfyui-models-image
+```
+
+### Out of Space
+
+Check available storage:
+```bash
+df -h /workspace
+du -sh /workspace/huggingface_cache
+```
+
+Remove optional models:
+```bash
+# Keep only essential models in playbook vars
+```
+
+### Custom Node Dependencies
+
+Some nodes require manual intervention:
+```bash
+cd /workspace/ComfyUI/custom_nodes/NodeName
+pip3 install -r requirements.txt
+```
+
+## Next Steps
+
+After installation:
+
+1. **Start ComfyUI:**
+   ```bash
+   cd /workspace/ComfyUI
+   python3 main.py --listen 0.0.0.0 --port 8188
+   ```
+
+2. **Access via Browser:**
+   ```
+   http://your-runpod-ip:8188
+   ```
+
+3. **Install Additional Nodes via Manager:**
+   - ComfyUI Manager provides GUI for installing more nodes
+   - Browse and install from the manager interface
+
+4. **Download Model Variants:**
+   - Use ComfyUI Manager to download LoRAs
+   - Download ControlNet models as needed
+   - Get additional checkpoints from Civitai
+
+## Support
+
+For issues or questions:
+- Check Ansible logs for download status
+- Verify HuggingFace token is valid
+- Ensure sufficient storage space
+- Check VRAM requirements for your GPU
diff --git a/README.md b/README.md
index ad92044..9d36f45 100644
--- a/README.md
+++ b/README.md
@@ -85,9 +85,9 @@ Unlike the Docker-based version (`orchestrator.py`), the subprocess implementati
    # - PORT=9000 (orchestrator port)
    ```
 
-3. **Run setup (installs dependencies, downloads models):**
+3. **Run setup (installs dependencies via Ansible):**
    ```bash
-   bash scripts/install.sh
+   ansible-playbook playbook.yml
    ```
 
 4. **Start all services:**
@@ -105,11 +105,14 @@ Unlike the Docker-based version (`orchestrator.py`), the subprocess implementati
 If you prefer step-by-step setup:
 
 ```bash
-# Install system dependencies via Ansible
-ansible-playbook playbook.yml --tags base,python,dependencies
-
-# Download model weights
-bash scripts/download-models.sh
+# Install system dependencies and download models via Ansible
+# Use specific tags for selective installation:
+# - base: Base system packages
+# - python: Python runtime via pyenv
+# - vllm: vLLM dependencies and Qwen model
+# - comfyui: ComfyUI installation
+# - comfyui-essential: Essential ComfyUI models only
+ansible-playbook playbook.yml --tags base,python,vllm
 
 # Start orchestrator
 python3 model-orchestrator/orchestrator_subprocess.py
@@ -250,10 +253,7 @@ qwen-2.5-7b:
    run_server(model=model, port=port)
    ```
 
-3. **Download model weights:**
-   ```bash
-   bash scripts/download-models.sh
-   ```
+3. **Models are downloaded automatically by the playbook** when using the appropriate tags (vllm, comfyui, etc.)
 
 4. **Restart orchestrator:**
    ```bash
@@ -363,7 +363,7 @@ sudo journalctl -u ai-orchestrator -n 50
 
 **Common issues:**
 - Out of GPU memory: Check VRAM usage with `nvidia-smi`
-- Missing model weights: Run `bash scripts/download-models.sh`
+- Missing model weights: Run `ansible-playbook playbook.yml --tags vllm` or `--tags comfyui-essential`
 - Port conflicts: Check if port is already in use with `lsof -i :9000`
 
 ### Orchestrator Not Responding
@@ -389,11 +389,11 @@ curl -X POST http://localhost:8000/v1/chat/completions \
 # Check HF_TOKEN is set
 echo $HF_TOKEN
 
-# Set token manually
-export HF_TOKEN=your_token_here
+# Set token in .env file
+echo "HF_TOKEN=your_token_here" >> .env
 
-# Re-run download
-bash scripts/download-models.sh
+# Re-run Ansible playbook to download models
+ansible-playbook playbook.yml --tags vllm
 ```
 
 ## Project Structure
@@ -408,22 +408,18 @@ runpod/
 │   ├── vllm/
 │   │   ├── server.py               # vLLM text generation service
 │   │   └── requirements.txt
-│   ├── flux/
-│   │   ├── server.py               # Flux image generation service
-│   │   └── requirements.txt
-│   └── musicgen/
-│       ├── server.py               # MusicGen audio generation service
-│       └── requirements.txt
+│   └── comfyui/                    # ComfyUI for image/video/audio generation
+│       ├── start.sh                # ComfyUI startup script
+│       └── models/                 # ComfyUI models directory
 ├── scripts/
-│   ├── install.sh                  # Setup script (Ansible + downloads)
 │   ├── start-all.sh                # Start orchestrator + models
-│   ├── stop-all.sh                 # Stop all services
-│   └── download-models.sh          # Download model weights from HuggingFace
+│   └── stop-all.sh                 # Stop all services
 ├── systemd/
 │   └── ai-orchestrator.service     # systemd service file (for VPS)
 ├── playbook.yml                    # Ansible playbook for system setup
 ├── inventory.yml                   # Ansible inventory
 ├── .env.example                    # Environment variables template
+├── COMFYUI_MODELS.md               # ComfyUI models usage guide
 └── README.md                       # This file
 ```
 
diff --git a/models/flux/requirements.txt b/models/flux/requirements.txt
deleted file mode 100644
index 7becd1b..0000000
--- a/models/flux/requirements.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# Flux.1 Image Generation Service Dependencies
-
-# Diffusers library (for Flux.1 pipeline)
-diffusers==0.30.0
-
-# PyTorch (required by diffusers)
-torch==2.1.0
-torchvision==0.16.0
-
-# Transformers (for model components)
-transformers==4.36.0
-
-# Image processing
-Pillow==10.1.0
-
-# Accelerate (for optimizations)
-accelerate==0.25.0
-
-# Additional dependencies for Flux
-sentencepiece==0.1.99
-protobuf==4.25.1
diff --git a/models/flux/server.py b/models/flux/server.py
deleted file mode 100644
index 796599f..0000000
--- a/models/flux/server.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-"""
-Flux.1 Image Generation Service
-
-OpenAI-compatible image generation using Flux.1 Schnell model.
-Provides /v1/images/generations endpoint.
-"""
-
-import base64
-import io
-import os
-from typing import Optional
-
-import torch
-from diffusers import FluxPipeline
-from fastapi import HTTPException
-from PIL import Image
-from pydantic import BaseModel, Field
-
-# Import base service class
-import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
-from core.base_service import GPUService
-
-
-class ImageGenerationRequest(BaseModel):
-    """Image generation request (OpenAI-compatible)"""
-    model: str = Field(default="flux-schnell", description="Model name")
-    prompt: str = Field(..., description="Text description of the image to generate")
-    n: int = Field(default=1, ge=1, le=4, description="Number of images to generate")
-    size: str = Field(default="1024x1024", description="Image size (e.g., 512x512, 1024x1024)")
-    response_format: str = Field(default="b64_json", description="Response format: url or b64_json")
-    quality: str = Field(default="standard", description="Image quality: standard or hd")
-    style: str = Field(default="natural", description="Image style: natural or vivid")
-
-
-class ImageGenerationResponse(BaseModel):
-    """Image generation response (OpenAI-compatible)"""
-    created: int = Field(..., description="Unix timestamp")
-    data: list = Field(..., description="List of generated images")
-
-
-class FluxService(GPUService):
-    """Flux.1 Schnell image generation service"""
-
-    def __init__(self):
-        # Get port from environment or use default
-        port = int(os.getenv("PORT", "8002"))
-        super().__init__(name="flux-schnell", port=port)
-
-        # Service-specific attributes
-        self.pipeline: Optional[FluxPipeline] = None
-        self.model_name = os.getenv("MODEL_NAME", "black-forest-labs/FLUX.1-schnell")
-
-    async def initialize(self):
-        """Initialize Flux.1 pipeline"""
-        await super().initialize()
-
-        self.logger.info(f"Loading Flux.1 pipeline: {self.model_name}")
-
-        # Load pipeline
-        self.pipeline = FluxPipeline.from_pretrained(
-            self.model_name,
-            torch_dtype=torch.bfloat16,
-            cache_dir=os.getenv("HF_CACHE_DIR", "/workspace/huggingface_cache")
-        )
-
-        # Move to GPU
-        if torch.cuda.is_available():
-            self.pipeline = self.pipeline.to("cuda")
-            self.logger.info("Flux.1 pipeline loaded on GPU")
-        else:
-            self.logger.warning("GPU not available, running on CPU (very slow)")
-
-        # Enable memory optimizations
-        if hasattr(self.pipeline, 'enable_model_cpu_offload'):
-            # This moves models to GPU only when needed, saving VRAM
-            self.pipeline.enable_model_cpu_offload()
-
-        self.logger.info("Flux.1 pipeline initialized successfully")
-
-    async def cleanup(self):
-        """Cleanup resources"""
-        await super().cleanup()
-        if self.pipeline:
-            self.logger.info("Flux.1 pipeline cleanup")
-            self.pipeline = None
-
-    def parse_size(self, size_str: str) -> tuple[int, int]:
-        """Parse size string like '1024x1024' into (width, height)"""
-        try:
-            parts = size_str.lower().split('x')
-            if len(parts) != 2:
-                return (1024, 1024)
-            width = int(parts[0])
-            height = int(parts[1])
-            return (width, height)
-        except:
-            return (1024, 1024)
-
-    def image_to_base64(self, image: Image.Image) -> str:
-        """Convert PIL Image to base64 string"""
-        buffered = io.BytesIO()
-        image.save(buffered, format="PNG")
-        img_bytes = buffered.getvalue()
-        return base64.b64encode(img_bytes).decode('utf-8')
-
-    def create_app(self):
-        """Create FastAPI routes"""
-
-        @self.app.get("/")
-        async def root():
-            """Root endpoint"""
-            return {
-                "service": "Flux.1 Schnell Image Generation",
-                "model": self.model_name,
-                "max_images": 4
-            }
-
-        @self.app.get("/v1/models")
-        async def list_models():
-            """List available models (OpenAI-compatible)"""
-            return {
-                "object": "list",
-                "data": [
-                    {
-                        "id": "flux-schnell",
-                        "object": "model",
-                        "created": 1234567890,
-                        "owned_by": "black-forest-labs",
-                        "permission": [],
-                        "root": self.model_name,
-                        "parent": None,
-                    }
-                ]
-            }
-
-        @self.app.post("/v1/images/generations")
-        async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse:
-            """Generate images from text prompt (OpenAI-compatible)"""
-            if not self.pipeline:
-                raise HTTPException(status_code=503, detail="Model not initialized")
-
-            self.logger.info(f"Generating {request.n} image(s): {request.prompt[:100]}...")
-
-            try:
-                # Parse image size
-                width, height = self.parse_size(request.size)
-                self.logger.info(f"Size: {width}x{height}")
-
-                # Generate images
-                images = []
-                for i in range(request.n):
-                    self.logger.info(f"Generating image {i+1}/{request.n}")
-
-                    # Flux.1 Schnell uses 4 inference steps for speed
-                    image = self.pipeline(
-                        prompt=request.prompt,
-                        width=width,
-                        height=height,
-                        num_inference_steps=4,  # Schnell is optimized for 4 steps
-                        guidance_scale=0.0,  # Schnell doesn't use guidance
-                    ).images[0]
-
-                    # Convert to base64
-                    if request.response_format == "b64_json":
-                        image_data = {
-                            "b64_json": self.image_to_base64(image)
-                        }
-                    else:
-                        # For URL format, we'd need to save and serve the file
-                        # For now, we'll return base64 anyway
-                        image_data = {
-                            "b64_json": self.image_to_base64(image)
-                        }
-
-                    images.append(image_data)
-
-                self.logger.info(f"Generated {request.n} image(s) successfully")
-
-                return ImageGenerationResponse(
-                    created=1234567890,
-                    data=images
-                )
-
-            except Exception as e:
-                self.logger.error(f"Error generating image: {e}", exc_info=True)
-                raise HTTPException(status_code=500, detail=str(e))
-
-
-if __name__ == "__main__":
-    service = FluxService()
-    service.run()
diff --git a/models/musicgen/requirements.txt b/models/musicgen/requirements.txt
deleted file mode 100644
index a55f831..0000000
--- a/models/musicgen/requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# MusicGen Music Generation Service Dependencies
-
-# AudioCraft (contains MusicGen)
-audiocraft==1.3.0
-
-# PyTorch (required by AudioCraft)
-torch==2.1.0
-torchaudio==2.1.0
-
-# Additional dependencies
-transformers==4.36.0
diff --git a/models/musicgen/server.py b/models/musicgen/server.py
deleted file mode 100644
index 1b4a4fe..0000000
--- a/models/musicgen/server.py
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/env python3
-"""
-MusicGen Music Generation Service
-
-OpenAI-compatible music generation using Meta's MusicGen Medium model.
-Provides /v1/audio/generations endpoint.
-"""
-
-import base64
-import io
-import os
-import tempfile
-from typing import Optional
-
-import torch
-import torchaudio
-from audiocraft.models import MusicGen
-from fastapi import HTTPException
-from pydantic import BaseModel, Field
-
-# Import base service class
-import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..'))
-from core.base_service import GPUService
-
-
-class AudioGenerationRequest(BaseModel):
-    """Music generation request"""
-    model: str = Field(default="musicgen-medium", description="Model name")
-    prompt: str = Field(..., description="Text description of the music to generate")
-    duration: float = Field(default=30.0, ge=1.0, le=30.0, description="Duration in seconds")
-    temperature: float = Field(default=1.0, ge=0.1, le=2.0, description="Sampling temperature")
-    top_k: int = Field(default=250, ge=0, le=500, description="Top-k sampling")
-    top_p: float = Field(default=0.0, ge=0.0, le=1.0, description="Top-p (nucleus) sampling")
-    cfg_coef: float = Field(default=3.0, ge=1.0, le=15.0, description="Classifier-free guidance coefficient")
-    response_format: str = Field(default="wav", description="Audio format (wav or mp3)")
-
-
-class AudioGenerationResponse(BaseModel):
-    """Music generation response"""
-    audio: str = Field(..., description="Base64-encoded audio data")
-    format: str = Field(..., description="Audio format (wav or mp3)")
-    duration: float = Field(..., description="Duration in seconds")
-    sample_rate: int = Field(..., description="Sample rate in Hz")
-
-
-class MusicGenService(GPUService):
-    """MusicGen music generation service"""
-
-    def __init__(self):
-        # Get port from environment or use default
-        port = int(os.getenv("PORT", "8003"))
-        super().__init__(name="musicgen-medium", port=port)
-
-        # Service-specific attributes
-        self.model: Optional[MusicGen] = None
-        self.model_name = os.getenv("MODEL_NAME", "facebook/musicgen-medium")
-
-    async def initialize(self):
-        """Initialize MusicGen model"""
-        await super().initialize()
-
-        self.logger.info(f"Loading MusicGen model: {self.model_name}")
-
-        # Load model
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model = MusicGen.get_pretrained(self.model_name, device=device)
-
-        self.logger.info(f"MusicGen model loaded successfully")
-        self.logger.info(f"Max duration: 30 seconds at {self.model.sample_rate}Hz")
-
-    async def cleanup(self):
-        """Cleanup resources"""
-        await super().cleanup()
-        if self.model:
-            self.logger.info("MusicGen model cleanup")
-            self.model = None
-
-    def create_app(self):
-        """Create FastAPI routes"""
-
-        @self.app.get("/")
-        async def root():
-            """Root endpoint"""
-            return {
-                "service": "MusicGen API Server",
-                "model": self.model_name,
-                "max_duration": 30.0,
-                "sample_rate": self.model.sample_rate if self.model else 32000
-            }
-
-        @self.app.get("/v1/models")
-        async def list_models():
-            """List available models (OpenAI-compatible)"""
-            return {
-                "object": "list",
-                "data": [
-                    {
-                        "id": "musicgen-medium",
-                        "object": "model",
-                        "created": 1234567890,
-                        "owned_by": "meta",
-                        "permission": [],
-                        "root": self.model_name,
-                        "parent": None,
-                    }
-                ]
-            }
-
-        @self.app.post("/v1/audio/generations")
-        async def generate_audio(request: AudioGenerationRequest) -> AudioGenerationResponse:
-            """Generate music from text prompt"""
-            if not self.model:
-                raise HTTPException(status_code=503, detail="Model not initialized")
-
-            self.logger.info(f"Generating music: {request.prompt[:100]}...")
-            self.logger.info(f"Duration: {request.duration}s, Temperature: {request.temperature}")
-
-            try:
-                # Set generation parameters
-                self.model.set_generation_params(
-                    duration=request.duration,
-                    temperature=request.temperature,
-                    top_k=request.top_k,
-                    top_p=request.top_p,
-                    cfg_coef=request.cfg_coef,
-                )
-
-                # Generate audio
-                descriptions = [request.prompt]
-                with torch.no_grad():
-                    wav = self.model.generate(descriptions)
-
-                # wav shape: [batch_size, channels, samples]
-                # Extract first batch item
-                audio_data = wav[0].cpu()  # [channels, samples]
-
-                # Get sample rate
-                sample_rate = self.model.sample_rate
-
-                # Save to temporary file
-                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-                    temp_path = temp_file.name
-                    torchaudio.save(temp_path, audio_data, sample_rate)
-
-                # Read audio file and encode to base64
-                with open(temp_path, 'rb') as f:
-                    audio_bytes = f.read()
-
-                # Clean up temporary file
-                os.unlink(temp_path)
-
-                # Encode to base64
-                audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
-
-                self.logger.info(f"Generated {request.duration}s of audio")
-
-                return AudioGenerationResponse(
-                    audio=audio_base64,
-                    format="wav",
-                    duration=request.duration,
-                    sample_rate=sample_rate
-                )
-
-            except Exception as e:
-                self.logger.error(f"Error generating audio: {e}")
-                raise HTTPException(status_code=500, detail=str(e))
-
-
-if __name__ == "__main__":
-    service = MusicGenService()
-    service.run()
diff --git a/playbook.yml b/playbook.yml
index ae70e7f..64c2704 100644
--- a/playbook.yml
+++ b/playbook.yml
@@ -13,14 +13,21 @@
 #   ansible-playbook playbook.yml --tags validate    # Validate installation
 #
 # Tags:
-#   base        - System packages and dependencies
-#   python      - Python environment setup
-#   dependencies- Install Python packages
-#   models      - Download AI models
-#   comfyui     - Install and configure ComfyUI
-#   tailscale   - Install and configure Tailscale
-#   systemd     - Configure systemd services
-#   validate    - Health checks and validation
+#   base                   - System packages and dependencies
+#   python                 - Python environment setup
+#   dependencies           - Install Python packages
+#   models                 - Download AI models (vLLM, Flux, MusicGen)
+#   comfyui                - Install and configure ComfyUI base
+#   comfyui-models-image   - Download ComfyUI image generation models
+#   comfyui-models-video   - Download ComfyUI video generation models
+#   comfyui-models-audio   - Download ComfyUI audio generation models
+#   comfyui-models-support - Download CLIP, IP-Adapter, ControlNet models
+#   comfyui-models-all     - Download all ComfyUI models
+#   comfyui-nodes          - Install essential custom nodes
+#   comfyui-essential      - Quick setup (ComfyUI + essential models only)
+#   tailscale              - Install and configure Tailscale
+#   systemd                - Configure systemd services
+#   validate               - Health checks and validation
 #
 
 - name: Provision RunPod GPU Instance for AI Services
@@ -50,6 +57,208 @@
         name: "facebook/musicgen-medium"
         size_gb: 11
 
+    # ========================================================================
+    # ComfyUI Models - Comprehensive List for 24GB GPU
+    # ========================================================================
+
+    # ComfyUI Image Generation Models
+    comfyui_image_models:
+      # FLUX Models (Black Forest Labs) - State of the art 2025
+      - name: "black-forest-labs/FLUX.1-schnell"
+        type: "checkpoint"
+        category: "image"
+        size_gb: 23
+        vram_gb: 23
+        format: "fp16"
+        description: "FLUX.1 Schnell - Fast 4-step inference"
+        essential: true
+
+      - name: "black-forest-labs/FLUX.1-dev"
+        type: "checkpoint"
+        category: "image"
+        size_gb: 23
+        vram_gb: 23
+        format: "fp16"
+        description: "FLUX.1 Dev - Balanced quality/speed"
+        essential: false
+
+      # SDXL Models - Industry standard
+      - name: "stabilityai/stable-diffusion-xl-base-1.0"
+        type: "checkpoint"
+        category: "image"
+        size_gb: 7
+        vram_gb: 12
+        format: "fp16"
+        description: "SDXL 1.0 Base - 1024x1024 native resolution"
+        essential: true
+
+      - name: "stabilityai/stable-diffusion-xl-refiner-1.0"
+        type: "checkpoint"
+        category: "image"
+        size_gb: 6
+        vram_gb: 12
+        format: "fp16"
+        description: "SDXL Refiner - Enhances base output"
+        essential: false
+
+      # SD 3.5 Models - Latest Stability AI
+      - name: "stabilityai/stable-diffusion-3.5-large"
+        type: "checkpoint"
+        category: "image"
+        size_gb: 18
+        vram_gb: 20
+        format: "fp16"
+        description: "SD 3.5 Large - MMDiT architecture"
+        essential: false
+
+    # ComfyUI Video Generation Models
+    comfyui_video_models:
+      # CogVideoX - Text-to-video
+      - name: "THUDM/CogVideoX-5b"
+        type: "video"
+        category: "video"
+        size_gb: 20
+        vram_gb: 12  # with optimizations
+        description: "CogVideoX 5B - Professional text-to-video"
+        essential: true
+
+      # Stable Video Diffusion
+      - name: "stabilityai/stable-video-diffusion-img2vid"
+        type: "video"
+        category: "video"
+        size_gb: 8
+        vram_gb: 16
+        description: "SVD - 14 frame image-to-video"
+        essential: true
+
+      - name: "stabilityai/stable-video-diffusion-img2vid-xt"
+        type: "video"
+        category: "video"
+        size_gb: 8
+        vram_gb: 20
+        description: "SVD-XT - 25 frame image-to-video"
+        essential: false
+
+    # ComfyUI Audio Generation Models
+    comfyui_audio_models:
+      - name: "facebook/musicgen-small"
+        type: "audio"
+        category: "audio"
+        size_gb: 3
+        vram_gb: 4
+        description: "MusicGen Small - Fast music generation"
+        essential: false
+
+      - name: "facebook/musicgen-medium"
+        type: "audio"
+        category: "audio"
+        size_gb: 11
+        vram_gb: 8
+        description: "MusicGen Medium - Balanced quality"
+        essential: true
+
+      - name: "facebook/musicgen-large"
+        type: "audio"
+        category: "audio"
+        size_gb: 22
+        vram_gb: 16
+        description: "MusicGen Large - Highest quality"
+        essential: false
+
+    # ComfyUI Supporting Models (CLIP, IP-Adapter, ControlNet)
+    comfyui_support_models:
+      # CLIP Vision Models
+      - name: "openai/clip-vit-large-patch14"
+        type: "clip_vision"
+        category: "support"
+        size_gb: 2
+        description: "CLIP H - For SD 1.5 IP-Adapter"
+        essential: true
+        target_dir: "clip_vision"
+
+      - name: "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
+        type: "clip_vision"
+        category: "support"
+        size_gb: 7
+        description: "CLIP G - For SDXL IP-Adapter"
+        essential: true
+        target_dir: "clip_vision"
+
+      - name: "google/siglip-so400m-patch14-384"
+        type: "clip_vision"
+        category: "support"
+        size_gb: 2
+        description: "SigLIP - For FLUX models"
+        essential: true
+        target_dir: "clip_vision"
+
+    # ComfyUI Custom Nodes - Essential Extensions
+    comfyui_custom_nodes:
+      # ComfyUI Manager - Must have
+      - name: "ComfyUI-Manager"
+        repo: "https://github.com/ltdrdata/ComfyUI-Manager.git"
+        category: "manager"
+        description: "Install/manage custom nodes and models"
+        essential: true
+
+      # Video Generation Nodes
+      - name: "ComfyUI-VideoHelperSuite"
+        repo: "https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git"
+        category: "video"
+        description: "Video operations and processing"
+        essential: true
+
+      - name: "ComfyUI-AnimateDiff-Evolved"
+        repo: "https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git"
+        category: "video"
+        description: "AnimateDiff for video generation"
+        essential: true
+
+      - name: "ComfyUI-CogVideoXWrapper"
+        repo: "https://github.com/kijai/ComfyUI-CogVideoXWrapper.git"
+        category: "video"
+        description: "CogVideoX integration"
+        essential: false
+
+      # Image Enhancement Nodes
+      - name: "ComfyUI_IPAdapter_plus"
+        repo: "https://github.com/cubiq/ComfyUI_IPAdapter_plus.git"
+        category: "image"
+        description: "IP-Adapter for style transfer"
+        essential: true
+
+      - name: "ComfyUI-Impact-Pack"
+        repo: "https://github.com/ltdrdata/ComfyUI-Impact-Pack.git"
+        category: "image"
+        description: "Auto face enhancement, detailer"
+        essential: true
+
+      - name: "Comfyui-Inspire-Pack"
+        repo: "https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git"
+        category: "image"
+        description: "Additional inspiration tools"
+        essential: false
+
+      # Audio Generation Nodes
+      - name: "comfyui-sound-lab"
+        repo: "https://github.com/eigenpunk/comfyui-sound-lab.git"
+        category: "audio"
+        description: "MusicGen and Stable Audio integration"
+        essential: true
+
+      # Utility Nodes
+      - name: "ComfyUI-Advanced-ControlNet"
+        repo: "https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git"
+        category: "control"
+        description: "Advanced ControlNet features"
+        essential: false
+
+      - name: "ComfyUI-3D-Pack"
+        repo: "https://github.com/MrForExample/ComfyUI-3D-Pack.git"
+        category: "3d"
+        description: "3D asset generation"
+        essential: false
+
     # Service configuration
     services:
       - name: orchestrator
@@ -58,12 +267,6 @@
       - name: vllm
         port: 8001
         script: models/vllm/server.py
-      - name: flux
-        port: 8002
-        script: models/flux/server.py
-      - name: musicgen
-        port: 8003
-        script: models/musicgen/server.py
       - name: comfyui
         port: 8188
         script: models/comfyui/start.sh
@@ -146,18 +349,6 @@
             executable: pip3
           become: true
 
-        - name: Install Flux dependencies
-          pip:
-            requirements: "{{ ai_dir }}/models/flux/requirements.txt"
-            executable: pip3
-          become: true
-
-        - name: Install MusicGen dependencies
-          pip:
-            requirements: "{{ ai_dir }}/models/musicgen/requirements.txt"
-            executable: pip3
-          become: true
-
     #
     # ComfyUI Installation
     #
@@ -195,12 +386,26 @@
             state: directory
             mode: '0755'
           loop:
+            # Image Model Directories
             - checkpoints
             - unet
             - vae
             - loras
             - clip
+            - clip_vision
             - controlnet
+            - ipadapter
+            - embeddings
+            - upscale_models
+            # Video Model Directories
+            - video_models
+            - animatediff_models
+            - animatediff_motion_lora
+            # Audio Model Directories
+            - audio_models
+            # Utility Directories
+            - configs
+            - custom_nodes
 
         - name: Create symlink for Flux model in ComfyUI
           file:
@@ -231,6 +436,208 @@
 
               Access: http://localhost:8188
 
+    #
+    # ComfyUI Custom Nodes Installation
+    #
+    - name: Install ComfyUI Custom Nodes
+      tags: [comfyui-nodes, comfyui-essential]
+      block:
+        - name: Install essential ComfyUI custom nodes
+          git:
+            repo: "{{ item.repo }}"
+            dest: "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}"
+            version: main
+            update: yes
+          loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
+          loop_control:
+            label: "{{ item.name }}"
+          ignore_errors: yes
+
+        - name: Install custom node dependencies
+          shell: |
+            if [ -f "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" ]; then
+              pip3 install -r "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt"
+            fi
+          loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
+          loop_control:
+            label: "{{ item.name }}"
+          become: true
+          ignore_errors: yes
+
+        - name: Display custom nodes installation summary
+          debug:
+            msg: |
+              ✓ Custom nodes installed successfully!
+
+              Essential nodes:
+              {% for node in comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list %}
+              - {{ node.name }}: {{ node.description }}
+              {% endfor %}
+
+              To install ALL nodes (including optional):
+              ansible-playbook playbook.yml --tags comfyui-nodes-all
+
+    #
+    # ComfyUI Image Models Download
+    #
+    - name: Download ComfyUI Image Generation Models
+      tags: [comfyui-models-image, comfyui-models-all, comfyui-essential]
+      block:
+        - name: Download essential image generation models
+          shell: |
+            python3 -c "
+            from huggingface_hub import snapshot_download
+            import os
+            os.environ['HF_HOME'] = '{{ cache_dir }}'
+            print('Downloading {{ item.name }}...')
+            snapshot_download(
+                repo_id='{{ item.name }}',
+                cache_dir='{{ cache_dir }}',
+                token=os.environ.get('HF_TOKEN')
+            )
+            print('Completed {{ item.name }}')
+            "
+          environment:
+            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
+            HF_HOME: "{{ cache_dir }}"
+          loop: "{{ comfyui_image_models | selectattr('essential', 'equalto', true) | list }}"
+          loop_control:
+            label: "{{ item.name }} ({{ item.size_gb }}GB)"
+          async: 3600
+          poll: 30
+          ignore_errors: yes
+
+        - name: Display image models summary
+          debug:
+            msg: |
+              Image generation models downloaded:
+              {% for model in comfyui_image_models | selectattr('essential', 'equalto', true) | list %}
+              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
+              {% endfor %}
+
+              Total size: ~{{ (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) }}GB
+
+    #
+    # ComfyUI Video Models Download
+    #
+    - name: Download ComfyUI Video Generation Models
+      tags: [comfyui-models-video, comfyui-models-all]
+      block:
+        - name: Download essential video generation models
+          shell: |
+            python3 -c "
+            from huggingface_hub import snapshot_download
+            import os
+            os.environ['HF_HOME'] = '{{ cache_dir }}'
+            print('Downloading {{ item.name }}...')
+            snapshot_download(
+                repo_id='{{ item.name }}',
+                cache_dir='{{ cache_dir }}',
+                token=os.environ.get('HF_TOKEN')
+            )
+            print('Completed {{ item.name }}')
+            "
+          environment:
+            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
+            HF_HOME: "{{ cache_dir }}"
+          loop: "{{ comfyui_video_models | selectattr('essential', 'equalto', true) | list }}"
+          loop_control:
+            label: "{{ item.name }} ({{ item.size_gb }}GB)"
+          async: 3600
+          poll: 30
+          ignore_errors: yes
+
+        - name: Display video models summary
+          debug:
+            msg: |
+              Video generation models downloaded:
+              {% for model in comfyui_video_models | selectattr('essential', 'equalto', true) | list %}
+              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
+              {% endfor %}
+
+    #
+    # ComfyUI Audio Models Download
+    #
+    - name: Download ComfyUI Audio Generation Models
+      tags: [comfyui-models-audio, comfyui-models-all]
+      block:
+        - name: Download essential audio generation models
+          shell: |
+            python3 -c "
+            from huggingface_hub import snapshot_download
+            import os
+            os.environ['HF_HOME'] = '{{ cache_dir }}'
+            print('Downloading {{ item.name }}...')
+            snapshot_download(
+                repo_id='{{ item.name }}',
+                cache_dir='{{ cache_dir }}',
+                token=os.environ.get('HF_TOKEN')
+            )
+            print('Completed {{ item.name }}')
+            "
+          environment:
+            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
+            HF_HOME: "{{ cache_dir }}"
+          loop: "{{ comfyui_audio_models | selectattr('essential', 'equalto', true) | list }}"
+          loop_control:
+            label: "{{ item.name }} ({{ item.size_gb }}GB)"
+          async: 3600
+          poll: 30
+          ignore_errors: yes
+
+        - name: Display audio models summary
+          debug:
+            msg: |
+              Audio generation models downloaded:
+              {% for model in comfyui_audio_models | selectattr('essential', 'equalto', true) | list %}
+              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
+              {% endfor %}
+
+    #
+    # ComfyUI Support Models Download (CLIP, IP-Adapter, ControlNet)
+    #
+    - name: Download ComfyUI Support Models
+      tags: [comfyui-models-support, comfyui-models-all, comfyui-essential]
+      block:
+        - name: Download essential support models (CLIP, IP-Adapter)
+          shell: |
+            python3 -c "
+            from huggingface_hub import snapshot_download
+            import os
+            os.environ['HF_HOME'] = '{{ cache_dir }}'
+            print('Downloading {{ item.name }}...')
+            snapshot_download(
+                repo_id='{{ item.name }}',
+                cache_dir='{{ cache_dir }}',
+                token=os.environ.get('HF_TOKEN')
+            )
+            print('Completed {{ item.name }}')
+            "
+          environment:
+            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
+            HF_HOME: "{{ cache_dir }}"
+          loop: "{{ comfyui_support_models | selectattr('essential', 'equalto', true) | list }}"
+          loop_control:
+            label: "{{ item.name }} ({{ item.size_gb }}GB)"
+          async: 1800
+          poll: 30
+          ignore_errors: yes
+
+        - name: Display support models summary
+          debug:
+            msg: |
+              Support models downloaded:
+              {% for model in comfyui_support_models | selectattr('essential', 'equalto', true) | list %}
+              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
+              {% endfor %}
+
+              Total ComfyUI models cache: ~{{
+                (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
+                (comfyui_video_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
+                (comfyui_audio_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
+                (comfyui_support_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb'))
+              }}GB
+
     #
     # Download AI Models
     #
diff --git a/scripts/download-models.sh b/scripts/download-models.sh
deleted file mode 100644
index e093f66..0000000
--- a/scripts/download-models.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Download AI Models
-# Wrapper for Ansible models tag
-#
-
-set -e
-
-cd "$(dirname "$0")/.."
-
-echo "========================================="
-echo "  Downloading AI Models (~37GB)"
-echo "========================================="
-echo ""
-
-# Source .env if it exists
-if [ -f .env ]; then
-    set -a
-    source .env
-    set +a
-fi
-
-# Check HF_TOKEN
-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: HF_TOKEN not set"
-    echo "Add HF_TOKEN to .env file"
-    exit 1
-fi
-
-# Run Ansible with models tag
-ansible-playbook playbook.yml --tags models
-
-echo ""
-echo "========================================="
-echo "  Model download complete!"
-echo "========================================="
diff --git a/scripts/install.sh b/scripts/install.sh
deleted file mode 100644
index 7a24314..0000000
--- a/scripts/install.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-#
-# Install AI Infrastructure
-# Wrapper script for Ansible playbook
-#
-# Usage:
-#   ./install.sh              # Full installation
-#   ./install.sh --tags base  # Install specific components
-#
-
-set -e
-
-cd "$(dirname "$0")/.."
-
-echo "========================================="
-echo "  RunPod AI Infrastructure Installation"
-echo "========================================="
-echo ""
-
-# Check if Ansible is installed
-if ! command -v ansible-playbook &> /dev/null; then
-    echo "Ansible not found. Installing..."
-    sudo apt update
-    sudo apt install -y ansible
-fi
-
-# Check for .env file
-if [ ! -f .env ]; then
-    echo "Warning: .env file not found"
-    echo "Copy .env.example to .env and add your HF_TOKEN"
-    echo ""
-fi
-
-# Source .env if it exists
-if [ -f .env ]; then
-    set -a
-    source .env
-    set +a
-fi
-
-# Run Ansible playbook
-echo "Running Ansible playbook..."
-echo ""
-
-ansible-playbook playbook.yml "$@"
-
-echo ""
-echo "========================================="
-echo "  Installation complete!"
-echo "========================================="
diff --git a/scripts/prepare-template.sh b/scripts/prepare-template.sh
deleted file mode 100644
index 0ffd15a..0000000
--- a/scripts/prepare-template.sh
+++ /dev/null
@@ -1,314 +0,0 @@
-#!/bin/bash
-#
-# RunPod Template Preparation Script
-# Prepares a RunPod instance for template creation
-#
-# This script:
-# 1. Installs Docker & Docker Compose
-# 2. Installs Tailscale
-# 3. Builds all Docker images
-# 4. Pre-downloads all models
-# 5. Validates everything works
-# 6. Cleans up for template creation
-#
-# Usage: ./prepare-template.sh
-# Run this on the RunPod instance you want to save as a template
-#
-
-set -e  # Exit on error
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Logging functions
-log_info() {
-    echo -e "${BLUE}[INFO]${NC} $1"
-}
-
-log_success() {
-    echo -e "${GREEN}[SUCCESS]${NC} $1"
-}
-
-log_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-log_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# Check if running on RunPod
-check_environment() {
-    log_info "Checking environment..."
-
-    if ! nvidia-smi &> /dev/null; then
-        log_error "NVIDIA GPU not detected. Are you running on a GPU instance?"
-        exit 1
-    fi
-
-    if [ ! -d "/workspace" ]; then
-        log_warn "/workspace directory not found. Creating it..."
-        mkdir -p /workspace
-    fi
-
-    log_success "Environment check passed"
-}
-
-# Install Docker
-install_docker() {
-    if command -v docker &> /dev/null; then
-        log_info "Docker already installed: $(docker --version)"
-        return
-    fi
-
-    log_info "Installing Docker..."
-    curl -fsSL https://get.docker.com -o get-docker.sh
-    sh get-docker.sh
-    rm get-docker.sh
-
-    # Start Docker daemon (RunPod requires --iptables=false --bridge=none)
-    log_info "Starting Docker daemon..."
-    pkill dockerd 2>/dev/null || true
-    sleep 2
-    dockerd --iptables=false --bridge=none > /var/log/dockerd.log 2>&1 &
-    sleep 10
-
-    # Verify Docker is running
-    if docker ps &> /dev/null; then
-        log_success "Docker installed and running: $(docker --version)"
-    else
-        log_error "Docker failed to start. Check /var/log/dockerd.log"
-        exit 1
-    fi
-}
-
-# Install Docker Compose
-install_docker_compose() {
-    if docker compose version &> /dev/null; then
-        log_info "Docker Compose already installed: $(docker compose version)"
-        return
-    fi
-
-    log_info "Installing Docker Compose..."
-
-    # Docker Compose is usually bundled with Docker now
-    # If not, install it separately
-    if ! docker compose version &> /dev/null; then
-        DOCKER_COMPOSE_VERSION="v2.23.0"
-        curl -L "https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
-        chmod +x /usr/local/bin/docker-compose
-    fi
-
-    log_success "Docker Compose installed: $(docker compose version)"
-}
-
-# Install Tailscale
-install_tailscale() {
-    if command -v tailscale &> /dev/null; then
-        log_info "Tailscale already installed: $(tailscale version)"
-        return
-    fi
-
-    log_info "Installing Tailscale..."
-    curl -fsSL https://tailscale.com/install.sh | sh
-
-    log_success "Tailscale installed: $(tailscale version)"
-}
-
-# Build Docker images
-build_docker_images() {
-    log_info "Building Docker images..."
-
-    cd /workspace/ai
-
-    # Use legacy builder (buildkit has permission issues in RunPod)
-    export DOCKER_BUILDKIT=0
-
-    # Build orchestrator
-    log_info "Building orchestrator..."
-    docker compose -f compose.yaml build orchestrator
-
-    # Build vLLM
-    log_info "Building vLLM..."
-    docker compose -f compose.yaml build vllm-qwen
-
-    # Build MusicGen
-    log_info "Building MusicGen..."
-    docker compose -f compose.yaml build musicgen
-
-    # Pull Flux image (pre-built)
-    log_info "Pulling Flux.1 image..."
-    docker pull ghcr.io/matatonic/openedai-images-flux:latest
-
-    log_success "All Docker images built"
-}
-
-# Pre-download models
-download_models() {
-    log_info "Pre-downloading AI models (this will take 30-45 minutes)..."
-
-    cd /workspace/ai
-
-    # Create model cache directories
-    mkdir -p /workspace/huggingface_cache
-    mkdir -p /workspace/flux/models
-    mkdir -p /workspace/musicgen/models
-
-    # Download Qwen 2.5 7B
-    log_info "Downloading Qwen 2.5 7B (14GB)..."
-    docker compose --profile text up -d vllm-qwen
-
-    # Wait for model to download
-    log_info "Waiting for Qwen model to download..."
-    while ! docker logs ai_vllm-qwen_1 2>&1 | grep -q "Model loaded successfully\|AsyncLLMEngine initialized"; do
-        echo -n "."
-        sleep 10
-    done
-    echo ""
-    log_success "Qwen 2.5 7B downloaded"
-
-    docker compose stop vllm-qwen
-
-    # Download Flux.1 Schnell
-    log_info "Downloading Flux.1 Schnell (12GB)..."
-    docker compose --profile image up -d flux
-
-    log_info "Waiting for Flux model to download..."
-    sleep 180  # Flux takes about 3 minutes to download and initialize
-    log_success "Flux.1 Schnell downloaded"
-
-    docker compose stop flux
-
-    # Download MusicGen Medium
-    log_info "Downloading MusicGen Medium (11GB)..."
-    docker compose --profile audio up -d musicgen
-
-    log_info "Waiting for MusicGen model to download..."
-    while ! docker logs ai_musicgen_1 2>&1 | grep -q "Model loaded successfully\|initialized successfully"; do
-        echo -n "."
-        sleep 10
-    done
-    echo ""
-    log_success "MusicGen Medium downloaded"
-
-    docker compose stop musicgen
-
-    log_success "All models downloaded and cached"
-}
-
-# Validate installation
-validate_installation() {
-    log_info "Validating installation..."
-
-    cd /workspace/ai
-
-    # Start orchestrator
-    log_info "Starting orchestrator for validation..."
-    docker compose -f compose.yaml up -d orchestrator
-
-    sleep 10
-
-    # Check orchestrator health
-    if curl -s http://localhost:9000/health | grep -q "healthy\|ok"; then
-        log_success "Orchestrator is healthy"
-    else
-        log_error "Orchestrator health check failed"
-        docker logs ai_orchestrator
-        exit 1
-    fi
-
-    # Check models are cached
-    if [ -d "/workspace/huggingface_cache" ] && [ "$(ls -A /workspace/huggingface_cache)" ]; then
-        log_success "Hugging Face cache populated"
-    else
-        log_warn "Hugging Face cache may be empty"
-    fi
-
-    # Stop orchestrator
-    docker compose -f compose.yaml down
-
-    log_success "Validation passed"
-}
-
-# Clean up for template creation
-cleanup_for_template() {
-    log_info "Cleaning up for template creation..."
-
-    # Remove sensitive data
-    log_info "Removing sensitive files..."
-    rm -f /workspace/ai/.env
-    rm -f /root/.ssh/known_hosts
-    rm -f /root/.bash_history
-    rm -f /root/.python_history
-
-    # Clear logs
-    log_info "Clearing logs..."
-    find /var/log -type f -name "*.log" -delete 2>/dev/null || true
-    journalctl --vacuum-time=1s 2>/dev/null || true
-
-    # Logout from Tailscale
-    log_info "Logging out from Tailscale..."
-    tailscale logout 2>/dev/null || true
-
-    # Clean Docker (but keep images)
-    log_info "Cleaning Docker cache..."
-    docker system prune -af --volumes || true
-
-    # Create template marker
-    log_info "Creating template version marker..."
-    cat > /workspace/TEMPLATE_VERSION <<EOF
-RunPod Multi-Modal AI Template
-Version: 1.0
-Created: $(date)
-Components:
-- Docker $(docker --version | cut -d' ' -f3)
-- Docker Compose $(docker compose version --short)
-- Tailscale $(tailscale version --short 2>/dev/null || echo "installed")
-- Orchestrator (ai_orchestrator)
-- Text Generation (vLLM + Qwen 2.5 7B)
-- Image Generation (Flux.1 Schnell)
-- Music Generation (MusicGen Medium)
-Models Cached: ~37GB
-EOF
-
-    log_success "Cleanup complete"
-}
-
-# Main execution
-main() {
-    log_info "======================================"
-    log_info "RunPod Template Preparation Script"
-    log_info "======================================"
-    log_info ""
-
-    check_environment
-    install_docker
-    install_docker_compose
-    install_tailscale
-    build_docker_images
-    download_models
-    validate_installation
-    cleanup_for_template
-
-    log_info ""
-    log_success "======================================"
-    log_success "Template Preparation Complete!"
-    log_success "======================================"
-    log_info ""
-    log_info "Next steps:"
-    log_info "1. Review /workspace/TEMPLATE_VERSION"
-    log_info "2. Go to RunPod Dashboard → My Pods"
-    log_info "3. Select this pod → ⋮ → Save as Template"
-    log_info "4. Name: multi-modal-ai-v1.0"
-    log_info "5. Test deployment from template"
-    log_info ""
-    log_info "Template will enable 2-3 minute deployments instead of 60-90 minutes!"
-    log_info ""
-}
-
-# Run main function
-main "$@"
diff --git a/scripts/stop-all.sh b/scripts/stop-all.sh
index df06e6c..d585a67 100644
--- a/scripts/stop-all.sh
+++ b/scripts/stop-all.sh
@@ -17,8 +17,6 @@ pkill -f "orchestrator_subprocess.py" || echo "Orchestrator not running"
 
 echo "Stopping model services..."
 pkill -f "models/vllm/server.py" || echo "vLLM not running"
-pkill -f "models/flux/server.py" || echo "Flux not running"
-pkill -f "models/musicgen/server.py" || echo "MusicGen not running"
 
 echo ""
 echo "All services stopped"
diff --git a/systemd/ai-orchestrator.service b/systemd/ai-orchestrator.service
deleted file mode 100644
index 8610ce9..0000000
--- a/systemd/ai-orchestrator.service
+++ /dev/null
@@ -1,24 +0,0 @@
-[Unit]
-Description=AI Model Orchestrator for RunPod
-After=network.target
-StartLimitIntervalSec=0
-
-[Service]
-Type=simple
-Restart=always
-RestartSec=10
-User=root
-WorkingDirectory=/workspace/ai
-EnvironmentFile=/workspace/ai/.env
-ExecStart=/usr/bin/python3 /workspace/ai/model-orchestrator/orchestrator_subprocess.py
-StandardOutput=journal
-StandardError=journal
-SyslogIdentifier=ai-orchestrator
-
-# Process management
-KillMode=process
-KillSignal=SIGTERM
-TimeoutStopSec=30
-
-[Install]
-WantedBy=multi-user.target