From c9b01eef68d23f11767d51b570e57f2c58c413ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sat, 22 Nov 2025 00:31:26 +0100 Subject: [PATCH] refactor: consolidate model management into Ansible playbook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove flux/musicgen standalone implementations in favor of ComfyUI: - Delete models/flux/ and models/musicgen/ directories - Remove redundant scripts (install.sh, download-models.sh, prepare-template.sh) - Update README.md to reference Ansible playbook commands - Update playbook.yml to remove flux/musicgen service definitions - Add COMFYUI_MODELS.md with comprehensive model installation guide - Update stop-all.sh to only manage orchestrator and vLLM services All model downloads and dependency management now handled via Ansible playbook tags (base, python, vllm, comfyui, comfyui-essential). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- COMFYUI_MODELS.md | 320 +++++++++++++++++++++ README.md | 46 ++-- models/flux/requirements.txt | 21 -- models/flux/server.py | 193 ------------- models/musicgen/requirements.txt | 11 - models/musicgen/server.py | 172 ------------ playbook.yml | 459 +++++++++++++++++++++++++++++-- scripts/download-models.sh | 36 --- scripts/install.sh | 50 ---- scripts/prepare-template.sh | 314 --------------------- scripts/stop-all.sh | 2 - systemd/ai-orchestrator.service | 24 -- 12 files changed, 774 insertions(+), 874 deletions(-) create mode 100644 COMFYUI_MODELS.md delete mode 100644 models/flux/requirements.txt delete mode 100644 models/flux/server.py delete mode 100644 models/musicgen/requirements.txt delete mode 100644 models/musicgen/server.py delete mode 100644 scripts/download-models.sh delete mode 100644 scripts/install.sh delete mode 100644 scripts/prepare-template.sh delete mode 100644 systemd/ai-orchestrator.service diff --git a/COMFYUI_MODELS.md b/COMFYUI_MODELS.md new file mode 100644 index 0000000..52a2ee2 --- /dev/null +++ b/COMFYUI_MODELS.md @@ -0,0 +1,320 @@ +# ComfyUI Models & Nodes - Usage Guide + +This document explains how to use the extended playbook.yml for installing ComfyUI models and custom nodes. + +## What's Been Added + +### Model Categories + +**Image Generation Models:** +- FLUX.1 Schnell (23GB, essential) - Fast 4-step inference +- FLUX.1 Dev (23GB, optional) - Balanced quality/speed +- SDXL Base 1.0 (7GB, essential) - Industry standard +- SDXL Refiner 1.0 (6GB, optional) - Enhances base output +- SD 3.5 Large (18GB, optional) - Latest Stability AI + +**Video Generation Models:** +- CogVideoX-5B (20GB, essential) - Professional text-to-video +- SVD (8GB, essential) - 14 frame image-to-video +- SVD-XT (8GB, optional) - 25 frame image-to-video + +**Audio Generation Models:** +- MusicGen Small (3GB, optional) - Fast generation +- MusicGen Medium (11GB, essential) - Balanced quality +- MusicGen Large (22GB, optional) - Highest quality + +**Support Models:** +- CLIP H (2GB, essential) - For SD 1.5 IP-Adapter +- CLIP G (7GB, essential) - For SDXL IP-Adapter +- SigLIP (2GB, essential) - For FLUX models + +### Custom Nodes + +**Essential Nodes (installed by default):** +- ComfyUI-Manager - Install/manage custom nodes and models +- ComfyUI-VideoHelperSuite - Video operations +- ComfyUI-AnimateDiff-Evolved - Video generation +- ComfyUI_IPAdapter_plus - Style transfer +- ComfyUI-Impact-Pack - Auto face enhancement +- comfyui-sound-lab - Audio generation + +**Optional Nodes:** +- ComfyUI-CogVideoXWrapper - CogVideoX integration +- Comfyui-Inspire-Pack - Additional tools +- ComfyUI-Advanced-ControlNet - Advanced control +- ComfyUI-3D-Pack - 3D asset generation + +## Usage Examples + +### Quick Setup (Essential Only) + +Install ComfyUI with only essential models and nodes: + +```bash +ansible-playbook playbook.yml --tags comfyui-essential +``` + +This downloads: +- Image: FLUX Schnell (23GB) + SDXL Base (7GB) +- Video: CogVideoX-5B (20GB) + SVD (8GB) +- Audio: MusicGen Medium (11GB) +- Support: All 3 CLIP models (11GB) +- **Total: ~80GB** + +### Selective Installation + +#### Install ComfyUI Base Only + +```bash +ansible-playbook playbook.yml --tags comfyui +``` + +#### Install All Image Models + +```bash +ansible-playbook playbook.yml --tags comfyui-models-image +``` + +#### Install All Video Models + +```bash +ansible-playbook playbook.yml --tags comfyui-models-video +``` + +#### Install All Audio Models + +```bash +ansible-playbook playbook.yml --tags comfyui-models-audio +``` + +#### Install Support Models (CLIP, IP-Adapter) + +```bash +ansible-playbook playbook.yml --tags comfyui-models-support +``` + +#### Install Custom Nodes + +```bash +ansible-playbook playbook.yml --tags comfyui-nodes +``` + +### Combined Installation + +#### Full ComfyUI Setup + +```bash +ansible-playbook playbook.yml --tags comfyui,comfyui-models-all,comfyui-nodes +``` + +#### ComfyUI + Image + Video Only + +```bash +ansible-playbook playbook.yml --tags comfyui,comfyui-models-image,comfyui-models-video,comfyui-nodes +``` + +## Adding New Models + +### Add Image Model + +Edit `playbook.yml` and append to `comfyui_image_models`: + +```yaml +- name: "author/model-name" + type: "checkpoint" + category: "image" + size_gb: 15 + vram_gb: 18 + format: "fp16" + description: "Model description" + essential: false # Set to true for default installs +``` + +### Add Custom Node + +Append to `comfyui_custom_nodes`: + +```yaml +- name: "NodeName" + repo: "https://github.com/author/repo.git" + category: "image" # or video, audio, control, etc. + description: "What it does" + essential: false +``` + +## VRAM Requirements Summary + +For your **24GB GPU**, you can run: + +### Simultaneously Loadable Combinations + +**Option 1: Image Generation Focus** +- FLUX Schnell FP16 (23GB) → Full model, leaves 1GB for operations +- FLUX Schnell FP8 (12GB) + SDXL (12GB) → Both loaded +- SDXL (12GB) + Refiner (12GB) → Both loaded with room + +**Option 2: Video Generation** +- CogVideoX-5B with optimizations (12GB) + SDXL (12GB) +- SVD-XT (20GB) → Good frame count + +**Option 3: Multi-Modal** +- SDXL (12GB) + MusicGen Medium (8GB) +- CogVideoX-5B (12GB) + MusicGen Small (4GB) + +## Storage Requirements + +### Essential Models Only +- Image: 30GB +- Video: 28GB +- Audio: 11GB +- Support: 11GB +- **Total: ~80GB** + +### All Models (including optional) +- Image: 54GB +- Video: 36GB +- Audio: 36GB +- Support: 11GB +- **Total: ~137GB** + +## Directory Structure Created + +``` +/workspace/ComfyUI/ +├── models/ +│ ├── checkpoints/ # FLUX, SDXL, SD3 models +│ ├── unet/ # Diffusion U-Nets +│ ├── vae/ # VAE models +│ ├── loras/ # LoRA adapters +│ ├── clip/ # CLIP text encoders +│ ├── clip_vision/ # CLIP vision models +│ ├── controlnet/ # ControlNet models +│ ├── ipadapter/ # IP-Adapter models +│ ├── embeddings/ # Text embeddings +│ ├── upscale_models/ # Upscalers +│ ├── video_models/ # Video generation models +│ ├── animatediff_models/ # AnimateDiff models +│ ├── audio_models/ # Audio generation models +│ └── configs/ # Model configs +└── custom_nodes/ # Extension nodes +``` + +## Extending the Playbook + +### Add a New Model Category + +1. Add variable list in `vars` section: + +```yaml +comfyui_new_category_models: + - name: "model-repo-id" + type: "new_type" + category: "new_category" + size_gb: 10 + description: "Description" + essential: true +``` + +2. Create download task section: + +```yaml +- name: Download New Category Models + tags: [comfyui-models-newcat, comfyui-models-all] + block: + # Copy pattern from existing model download tasks +``` + +3. Update tags documentation at top of file + +## Model Download Notes + +- **Async Downloads**: Large models download asynchronously with 1-hour timeouts +- **Smart Caching**: Models are only downloaded if not already cached +- **HuggingFace Token**: Set `HF_TOKEN` environment variable for gated models +- **Error Handling**: Download failures are ignored to allow partial installations + +## Performance Tips + +### For 24GB GPU: + +1. **Use FP8 Quantized FLUX** when you need multiple models loaded +2. **Use Full FP16 FLUX** when you want maximum quality +3. **Enable CogVideoX Optimizations** to fit in 12GB (edit model config) +4. **Use GGUF variants** for FLUX to save VRAM (manually download) + +### Recommended First Install: + +```bash +# Essential setup (~80GB, ~1 hour) +ansible-playbook playbook.yml --tags comfyui-essential +``` + +Then add specific models as needed: + +```bash +# Add video models later +ansible-playbook playbook.yml --tags comfyui-models-video +``` + +## Troubleshooting + +### Downloads Failing + +Check HuggingFace token: +```bash +export HF_TOKEN="your_token_here" +ansible-playbook playbook.yml --tags comfyui-models-image +``` + +### Out of Space + +Check available storage: +```bash +df -h /workspace +du -sh /workspace/huggingface_cache +``` + +Remove optional models: +```bash +# Keep only essential models in playbook vars +``` + +### Custom Node Dependencies + +Some nodes require manual intervention: +```bash +cd /workspace/ComfyUI/custom_nodes/NodeName +pip3 install -r requirements.txt +``` + +## Next Steps + +After installation: + +1. **Start ComfyUI:** + ```bash + cd /workspace/ComfyUI + python3 main.py --listen 0.0.0.0 --port 8188 + ``` + +2. **Access via Browser:** + ``` + http://your-runpod-ip:8188 + ``` + +3. **Install Additional Nodes via Manager:** + - ComfyUI Manager provides GUI for installing more nodes + - Browse and install from the manager interface + +4. **Download Model Variants:** + - Use ComfyUI Manager to download LoRAs + - Download ControlNet models as needed + - Get additional checkpoints from Civitai + +## Support + +For issues or questions: +- Check Ansible logs for download status +- Verify HuggingFace token is valid +- Ensure sufficient storage space +- Check VRAM requirements for your GPU diff --git a/README.md b/README.md index ad92044..9d36f45 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,9 @@ Unlike the Docker-based version (`orchestrator.py`), the subprocess implementati # - PORT=9000 (orchestrator port) ``` -3. **Run setup (installs dependencies, downloads models):** +3. **Run setup (installs dependencies via Ansible):** ```bash - bash scripts/install.sh + ansible-playbook playbook.yml ``` 4. **Start all services:** @@ -105,11 +105,14 @@ Unlike the Docker-based version (`orchestrator.py`), the subprocess implementati If you prefer step-by-step setup: ```bash -# Install system dependencies via Ansible -ansible-playbook playbook.yml --tags base,python,dependencies - -# Download model weights -bash scripts/download-models.sh +# Install system dependencies and download models via Ansible +# Use specific tags for selective installation: +# - base: Base system packages +# - python: Python runtime via pyenv +# - vllm: vLLM dependencies and Qwen model +# - comfyui: ComfyUI installation +# - comfyui-essential: Essential ComfyUI models only +ansible-playbook playbook.yml --tags base,python,vllm # Start orchestrator python3 model-orchestrator/orchestrator_subprocess.py @@ -250,10 +253,7 @@ qwen-2.5-7b: run_server(model=model, port=port) ``` -3. **Download model weights:** - ```bash - bash scripts/download-models.sh - ``` +3. **Models are downloaded automatically by the playbook** when using the appropriate tags (vllm, comfyui, etc.) 4. **Restart orchestrator:** ```bash @@ -363,7 +363,7 @@ sudo journalctl -u ai-orchestrator -n 50 **Common issues:** - Out of GPU memory: Check VRAM usage with `nvidia-smi` -- Missing model weights: Run `bash scripts/download-models.sh` +- Missing model weights: Run `ansible-playbook playbook.yml --tags vllm` or `--tags comfyui-essential` - Port conflicts: Check if port is already in use with `lsof -i :9000` ### Orchestrator Not Responding @@ -389,11 +389,11 @@ curl -X POST http://localhost:8000/v1/chat/completions \ # Check HF_TOKEN is set echo $HF_TOKEN -# Set token manually -export HF_TOKEN=your_token_here +# Set token in .env file +echo "HF_TOKEN=your_token_here" >> .env -# Re-run download -bash scripts/download-models.sh +# Re-run Ansible playbook to download models +ansible-playbook playbook.yml --tags vllm ``` ## Project Structure @@ -408,22 +408,18 @@ runpod/ │ ├── vllm/ │ │ ├── server.py # vLLM text generation service │ │ └── requirements.txt -│ ├── flux/ -│ │ ├── server.py # Flux image generation service -│ │ └── requirements.txt -│ └── musicgen/ -│ ├── server.py # MusicGen audio generation service -│ └── requirements.txt +│ └── comfyui/ # ComfyUI for image/video/audio generation +│ ├── start.sh # ComfyUI startup script +│ └── models/ # ComfyUI models directory ├── scripts/ -│ ├── install.sh # Setup script (Ansible + downloads) │ ├── start-all.sh # Start orchestrator + models -│ ├── stop-all.sh # Stop all services -│ └── download-models.sh # Download model weights from HuggingFace +│ └── stop-all.sh # Stop all services ├── systemd/ │ └── ai-orchestrator.service # systemd service file (for VPS) ├── playbook.yml # Ansible playbook for system setup ├── inventory.yml # Ansible inventory ├── .env.example # Environment variables template +├── COMFYUI_MODELS.md # ComfyUI models usage guide └── README.md # This file ``` diff --git a/models/flux/requirements.txt b/models/flux/requirements.txt deleted file mode 100644 index 7becd1b..0000000 --- a/models/flux/requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Flux.1 Image Generation Service Dependencies - -# Diffusers library (for Flux.1 pipeline) -diffusers==0.30.0 - -# PyTorch (required by diffusers) -torch==2.1.0 -torchvision==0.16.0 - -# Transformers (for model components) -transformers==4.36.0 - -# Image processing -Pillow==10.1.0 - -# Accelerate (for optimizations) -accelerate==0.25.0 - -# Additional dependencies for Flux -sentencepiece==0.1.99 -protobuf==4.25.1 diff --git a/models/flux/server.py b/models/flux/server.py deleted file mode 100644 index 796599f..0000000 --- a/models/flux/server.py +++ /dev/null @@ -1,193 +0,0 @@ -#!/usr/bin/env python3 -""" -Flux.1 Image Generation Service - -OpenAI-compatible image generation using Flux.1 Schnell model. -Provides /v1/images/generations endpoint. -""" - -import base64 -import io -import os -from typing import Optional - -import torch -from diffusers import FluxPipeline -from fastapi import HTTPException -from PIL import Image -from pydantic import BaseModel, Field - -# Import base service class -import sys -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..')) -from core.base_service import GPUService - - -class ImageGenerationRequest(BaseModel): - """Image generation request (OpenAI-compatible)""" - model: str = Field(default="flux-schnell", description="Model name") - prompt: str = Field(..., description="Text description of the image to generate") - n: int = Field(default=1, ge=1, le=4, description="Number of images to generate") - size: str = Field(default="1024x1024", description="Image size (e.g., 512x512, 1024x1024)") - response_format: str = Field(default="b64_json", description="Response format: url or b64_json") - quality: str = Field(default="standard", description="Image quality: standard or hd") - style: str = Field(default="natural", description="Image style: natural or vivid") - - -class ImageGenerationResponse(BaseModel): - """Image generation response (OpenAI-compatible)""" - created: int = Field(..., description="Unix timestamp") - data: list = Field(..., description="List of generated images") - - -class FluxService(GPUService): - """Flux.1 Schnell image generation service""" - - def __init__(self): - # Get port from environment or use default - port = int(os.getenv("PORT", "8002")) - super().__init__(name="flux-schnell", port=port) - - # Service-specific attributes - self.pipeline: Optional[FluxPipeline] = None - self.model_name = os.getenv("MODEL_NAME", "black-forest-labs/FLUX.1-schnell") - - async def initialize(self): - """Initialize Flux.1 pipeline""" - await super().initialize() - - self.logger.info(f"Loading Flux.1 pipeline: {self.model_name}") - - # Load pipeline - self.pipeline = FluxPipeline.from_pretrained( - self.model_name, - torch_dtype=torch.bfloat16, - cache_dir=os.getenv("HF_CACHE_DIR", "/workspace/huggingface_cache") - ) - - # Move to GPU - if torch.cuda.is_available(): - self.pipeline = self.pipeline.to("cuda") - self.logger.info("Flux.1 pipeline loaded on GPU") - else: - self.logger.warning("GPU not available, running on CPU (very slow)") - - # Enable memory optimizations - if hasattr(self.pipeline, 'enable_model_cpu_offload'): - # This moves models to GPU only when needed, saving VRAM - self.pipeline.enable_model_cpu_offload() - - self.logger.info("Flux.1 pipeline initialized successfully") - - async def cleanup(self): - """Cleanup resources""" - await super().cleanup() - if self.pipeline: - self.logger.info("Flux.1 pipeline cleanup") - self.pipeline = None - - def parse_size(self, size_str: str) -> tuple[int, int]: - """Parse size string like '1024x1024' into (width, height)""" - try: - parts = size_str.lower().split('x') - if len(parts) != 2: - return (1024, 1024) - width = int(parts[0]) - height = int(parts[1]) - return (width, height) - except: - return (1024, 1024) - - def image_to_base64(self, image: Image.Image) -> str: - """Convert PIL Image to base64 string""" - buffered = io.BytesIO() - image.save(buffered, format="PNG") - img_bytes = buffered.getvalue() - return base64.b64encode(img_bytes).decode('utf-8') - - def create_app(self): - """Create FastAPI routes""" - - @self.app.get("/") - async def root(): - """Root endpoint""" - return { - "service": "Flux.1 Schnell Image Generation", - "model": self.model_name, - "max_images": 4 - } - - @self.app.get("/v1/models") - async def list_models(): - """List available models (OpenAI-compatible)""" - return { - "object": "list", - "data": [ - { - "id": "flux-schnell", - "object": "model", - "created": 1234567890, - "owned_by": "black-forest-labs", - "permission": [], - "root": self.model_name, - "parent": None, - } - ] - } - - @self.app.post("/v1/images/generations") - async def generate_image(request: ImageGenerationRequest) -> ImageGenerationResponse: - """Generate images from text prompt (OpenAI-compatible)""" - if not self.pipeline: - raise HTTPException(status_code=503, detail="Model not initialized") - - self.logger.info(f"Generating {request.n} image(s): {request.prompt[:100]}...") - - try: - # Parse image size - width, height = self.parse_size(request.size) - self.logger.info(f"Size: {width}x{height}") - - # Generate images - images = [] - for i in range(request.n): - self.logger.info(f"Generating image {i+1}/{request.n}") - - # Flux.1 Schnell uses 4 inference steps for speed - image = self.pipeline( - prompt=request.prompt, - width=width, - height=height, - num_inference_steps=4, # Schnell is optimized for 4 steps - guidance_scale=0.0, # Schnell doesn't use guidance - ).images[0] - - # Convert to base64 - if request.response_format == "b64_json": - image_data = { - "b64_json": self.image_to_base64(image) - } - else: - # For URL format, we'd need to save and serve the file - # For now, we'll return base64 anyway - image_data = { - "b64_json": self.image_to_base64(image) - } - - images.append(image_data) - - self.logger.info(f"Generated {request.n} image(s) successfully") - - return ImageGenerationResponse( - created=1234567890, - data=images - ) - - except Exception as e: - self.logger.error(f"Error generating image: {e}", exc_info=True) - raise HTTPException(status_code=500, detail=str(e)) - - -if __name__ == "__main__": - service = FluxService() - service.run() diff --git a/models/musicgen/requirements.txt b/models/musicgen/requirements.txt deleted file mode 100644 index a55f831..0000000 --- a/models/musicgen/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -# MusicGen Music Generation Service Dependencies - -# AudioCraft (contains MusicGen) -audiocraft==1.3.0 - -# PyTorch (required by AudioCraft) -torch==2.1.0 -torchaudio==2.1.0 - -# Additional dependencies -transformers==4.36.0 diff --git a/models/musicgen/server.py b/models/musicgen/server.py deleted file mode 100644 index 1b4a4fe..0000000 --- a/models/musicgen/server.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python3 -""" -MusicGen Music Generation Service - -OpenAI-compatible music generation using Meta's MusicGen Medium model. -Provides /v1/audio/generations endpoint. -""" - -import base64 -import io -import os -import tempfile -from typing import Optional - -import torch -import torchaudio -from audiocraft.models import MusicGen -from fastapi import HTTPException -from pydantic import BaseModel, Field - -# Import base service class -import sys -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../..')) -from core.base_service import GPUService - - -class AudioGenerationRequest(BaseModel): - """Music generation request""" - model: str = Field(default="musicgen-medium", description="Model name") - prompt: str = Field(..., description="Text description of the music to generate") - duration: float = Field(default=30.0, ge=1.0, le=30.0, description="Duration in seconds") - temperature: float = Field(default=1.0, ge=0.1, le=2.0, description="Sampling temperature") - top_k: int = Field(default=250, ge=0, le=500, description="Top-k sampling") - top_p: float = Field(default=0.0, ge=0.0, le=1.0, description="Top-p (nucleus) sampling") - cfg_coef: float = Field(default=3.0, ge=1.0, le=15.0, description="Classifier-free guidance coefficient") - response_format: str = Field(default="wav", description="Audio format (wav or mp3)") - - -class AudioGenerationResponse(BaseModel): - """Music generation response""" - audio: str = Field(..., description="Base64-encoded audio data") - format: str = Field(..., description="Audio format (wav or mp3)") - duration: float = Field(..., description="Duration in seconds") - sample_rate: int = Field(..., description="Sample rate in Hz") - - -class MusicGenService(GPUService): - """MusicGen music generation service""" - - def __init__(self): - # Get port from environment or use default - port = int(os.getenv("PORT", "8003")) - super().__init__(name="musicgen-medium", port=port) - - # Service-specific attributes - self.model: Optional[MusicGen] = None - self.model_name = os.getenv("MODEL_NAME", "facebook/musicgen-medium") - - async def initialize(self): - """Initialize MusicGen model""" - await super().initialize() - - self.logger.info(f"Loading MusicGen model: {self.model_name}") - - # Load model - device = "cuda" if torch.cuda.is_available() else "cpu" - self.model = MusicGen.get_pretrained(self.model_name, device=device) - - self.logger.info(f"MusicGen model loaded successfully") - self.logger.info(f"Max duration: 30 seconds at {self.model.sample_rate}Hz") - - async def cleanup(self): - """Cleanup resources""" - await super().cleanup() - if self.model: - self.logger.info("MusicGen model cleanup") - self.model = None - - def create_app(self): - """Create FastAPI routes""" - - @self.app.get("/") - async def root(): - """Root endpoint""" - return { - "service": "MusicGen API Server", - "model": self.model_name, - "max_duration": 30.0, - "sample_rate": self.model.sample_rate if self.model else 32000 - } - - @self.app.get("/v1/models") - async def list_models(): - """List available models (OpenAI-compatible)""" - return { - "object": "list", - "data": [ - { - "id": "musicgen-medium", - "object": "model", - "created": 1234567890, - "owned_by": "meta", - "permission": [], - "root": self.model_name, - "parent": None, - } - ] - } - - @self.app.post("/v1/audio/generations") - async def generate_audio(request: AudioGenerationRequest) -> AudioGenerationResponse: - """Generate music from text prompt""" - if not self.model: - raise HTTPException(status_code=503, detail="Model not initialized") - - self.logger.info(f"Generating music: {request.prompt[:100]}...") - self.logger.info(f"Duration: {request.duration}s, Temperature: {request.temperature}") - - try: - # Set generation parameters - self.model.set_generation_params( - duration=request.duration, - temperature=request.temperature, - top_k=request.top_k, - top_p=request.top_p, - cfg_coef=request.cfg_coef, - ) - - # Generate audio - descriptions = [request.prompt] - with torch.no_grad(): - wav = self.model.generate(descriptions) - - # wav shape: [batch_size, channels, samples] - # Extract first batch item - audio_data = wav[0].cpu() # [channels, samples] - - # Get sample rate - sample_rate = self.model.sample_rate - - # Save to temporary file - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: - temp_path = temp_file.name - torchaudio.save(temp_path, audio_data, sample_rate) - - # Read audio file and encode to base64 - with open(temp_path, 'rb') as f: - audio_bytes = f.read() - - # Clean up temporary file - os.unlink(temp_path) - - # Encode to base64 - audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') - - self.logger.info(f"Generated {request.duration}s of audio") - - return AudioGenerationResponse( - audio=audio_base64, - format="wav", - duration=request.duration, - sample_rate=sample_rate - ) - - except Exception as e: - self.logger.error(f"Error generating audio: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -if __name__ == "__main__": - service = MusicGenService() - service.run() diff --git a/playbook.yml b/playbook.yml index ae70e7f..64c2704 100644 --- a/playbook.yml +++ b/playbook.yml @@ -13,14 +13,21 @@ # ansible-playbook playbook.yml --tags validate # Validate installation # # Tags: -# base - System packages and dependencies -# python - Python environment setup -# dependencies- Install Python packages -# models - Download AI models -# comfyui - Install and configure ComfyUI -# tailscale - Install and configure Tailscale -# systemd - Configure systemd services -# validate - Health checks and validation +# base - System packages and dependencies +# python - Python environment setup +# dependencies - Install Python packages +# models - Download AI models (vLLM, Flux, MusicGen) +# comfyui - Install and configure ComfyUI base +# comfyui-models-image - Download ComfyUI image generation models +# comfyui-models-video - Download ComfyUI video generation models +# comfyui-models-audio - Download ComfyUI audio generation models +# comfyui-models-support - Download CLIP, IP-Adapter, ControlNet models +# comfyui-models-all - Download all ComfyUI models +# comfyui-nodes - Install essential custom nodes +# comfyui-essential - Quick setup (ComfyUI + essential models only) +# tailscale - Install and configure Tailscale +# systemd - Configure systemd services +# validate - Health checks and validation # - name: Provision RunPod GPU Instance for AI Services @@ -50,6 +57,208 @@ name: "facebook/musicgen-medium" size_gb: 11 + # ======================================================================== + # ComfyUI Models - Comprehensive List for 24GB GPU + # ======================================================================== + + # ComfyUI Image Generation Models + comfyui_image_models: + # FLUX Models (Black Forest Labs) - State of the art 2025 + - name: "black-forest-labs/FLUX.1-schnell" + type: "checkpoint" + category: "image" + size_gb: 23 + vram_gb: 23 + format: "fp16" + description: "FLUX.1 Schnell - Fast 4-step inference" + essential: true + + - name: "black-forest-labs/FLUX.1-dev" + type: "checkpoint" + category: "image" + size_gb: 23 + vram_gb: 23 + format: "fp16" + description: "FLUX.1 Dev - Balanced quality/speed" + essential: false + + # SDXL Models - Industry standard + - name: "stabilityai/stable-diffusion-xl-base-1.0" + type: "checkpoint" + category: "image" + size_gb: 7 + vram_gb: 12 + format: "fp16" + description: "SDXL 1.0 Base - 1024x1024 native resolution" + essential: true + + - name: "stabilityai/stable-diffusion-xl-refiner-1.0" + type: "checkpoint" + category: "image" + size_gb: 6 + vram_gb: 12 + format: "fp16" + description: "SDXL Refiner - Enhances base output" + essential: false + + # SD 3.5 Models - Latest Stability AI + - name: "stabilityai/stable-diffusion-3.5-large" + type: "checkpoint" + category: "image" + size_gb: 18 + vram_gb: 20 + format: "fp16" + description: "SD 3.5 Large - MMDiT architecture" + essential: false + + # ComfyUI Video Generation Models + comfyui_video_models: + # CogVideoX - Text-to-video + - name: "THUDM/CogVideoX-5b" + type: "video" + category: "video" + size_gb: 20 + vram_gb: 12 # with optimizations + description: "CogVideoX 5B - Professional text-to-video" + essential: true + + # Stable Video Diffusion + - name: "stabilityai/stable-video-diffusion-img2vid" + type: "video" + category: "video" + size_gb: 8 + vram_gb: 16 + description: "SVD - 14 frame image-to-video" + essential: true + + - name: "stabilityai/stable-video-diffusion-img2vid-xt" + type: "video" + category: "video" + size_gb: 8 + vram_gb: 20 + description: "SVD-XT - 25 frame image-to-video" + essential: false + + # ComfyUI Audio Generation Models + comfyui_audio_models: + - name: "facebook/musicgen-small" + type: "audio" + category: "audio" + size_gb: 3 + vram_gb: 4 + description: "MusicGen Small - Fast music generation" + essential: false + + - name: "facebook/musicgen-medium" + type: "audio" + category: "audio" + size_gb: 11 + vram_gb: 8 + description: "MusicGen Medium - Balanced quality" + essential: true + + - name: "facebook/musicgen-large" + type: "audio" + category: "audio" + size_gb: 22 + vram_gb: 16 + description: "MusicGen Large - Highest quality" + essential: false + + # ComfyUI Supporting Models (CLIP, IP-Adapter, ControlNet) + comfyui_support_models: + # CLIP Vision Models + - name: "openai/clip-vit-large-patch14" + type: "clip_vision" + category: "support" + size_gb: 2 + description: "CLIP H - For SD 1.5 IP-Adapter" + essential: true + target_dir: "clip_vision" + + - name: "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" + type: "clip_vision" + category: "support" + size_gb: 7 + description: "CLIP G - For SDXL IP-Adapter" + essential: true + target_dir: "clip_vision" + + - name: "google/siglip-so400m-patch14-384" + type: "clip_vision" + category: "support" + size_gb: 2 + description: "SigLIP - For FLUX models" + essential: true + target_dir: "clip_vision" + + # ComfyUI Custom Nodes - Essential Extensions + comfyui_custom_nodes: + # ComfyUI Manager - Must have + - name: "ComfyUI-Manager" + repo: "https://github.com/ltdrdata/ComfyUI-Manager.git" + category: "manager" + description: "Install/manage custom nodes and models" + essential: true + + # Video Generation Nodes + - name: "ComfyUI-VideoHelperSuite" + repo: "https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git" + category: "video" + description: "Video operations and processing" + essential: true + + - name: "ComfyUI-AnimateDiff-Evolved" + repo: "https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git" + category: "video" + description: "AnimateDiff for video generation" + essential: true + + - name: "ComfyUI-CogVideoXWrapper" + repo: "https://github.com/kijai/ComfyUI-CogVideoXWrapper.git" + category: "video" + description: "CogVideoX integration" + essential: false + + # Image Enhancement Nodes + - name: "ComfyUI_IPAdapter_plus" + repo: "https://github.com/cubiq/ComfyUI_IPAdapter_plus.git" + category: "image" + description: "IP-Adapter for style transfer" + essential: true + + - name: "ComfyUI-Impact-Pack" + repo: "https://github.com/ltdrdata/ComfyUI-Impact-Pack.git" + category: "image" + description: "Auto face enhancement, detailer" + essential: true + + - name: "Comfyui-Inspire-Pack" + repo: "https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git" + category: "image" + description: "Additional inspiration tools" + essential: false + + # Audio Generation Nodes + - name: "comfyui-sound-lab" + repo: "https://github.com/eigenpunk/comfyui-sound-lab.git" + category: "audio" + description: "MusicGen and Stable Audio integration" + essential: true + + # Utility Nodes + - name: "ComfyUI-Advanced-ControlNet" + repo: "https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git" + category: "control" + description: "Advanced ControlNet features" + essential: false + + - name: "ComfyUI-3D-Pack" + repo: "https://github.com/MrForExample/ComfyUI-3D-Pack.git" + category: "3d" + description: "3D asset generation" + essential: false + # Service configuration services: - name: orchestrator @@ -58,12 +267,6 @@ - name: vllm port: 8001 script: models/vllm/server.py - - name: flux - port: 8002 - script: models/flux/server.py - - name: musicgen - port: 8003 - script: models/musicgen/server.py - name: comfyui port: 8188 script: models/comfyui/start.sh @@ -146,18 +349,6 @@ executable: pip3 become: true - - name: Install Flux dependencies - pip: - requirements: "{{ ai_dir }}/models/flux/requirements.txt" - executable: pip3 - become: true - - - name: Install MusicGen dependencies - pip: - requirements: "{{ ai_dir }}/models/musicgen/requirements.txt" - executable: pip3 - become: true - # # ComfyUI Installation # @@ -195,12 +386,26 @@ state: directory mode: '0755' loop: + # Image Model Directories - checkpoints - unet - vae - loras - clip + - clip_vision - controlnet + - ipadapter + - embeddings + - upscale_models + # Video Model Directories + - video_models + - animatediff_models + - animatediff_motion_lora + # Audio Model Directories + - audio_models + # Utility Directories + - configs + - custom_nodes - name: Create symlink for Flux model in ComfyUI file: @@ -231,6 +436,208 @@ Access: http://localhost:8188 + # + # ComfyUI Custom Nodes Installation + # + - name: Install ComfyUI Custom Nodes + tags: [comfyui-nodes, comfyui-essential] + block: + - name: Install essential ComfyUI custom nodes + git: + repo: "{{ item.repo }}" + dest: "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}" + version: main + update: yes + loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}" + loop_control: + label: "{{ item.name }}" + ignore_errors: yes + + - name: Install custom node dependencies + shell: | + if [ -f "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" ]; then + pip3 install -r "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" + fi + loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}" + loop_control: + label: "{{ item.name }}" + become: true + ignore_errors: yes + + - name: Display custom nodes installation summary + debug: + msg: | + ✓ Custom nodes installed successfully! + + Essential nodes: + {% for node in comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list %} + - {{ node.name }}: {{ node.description }} + {% endfor %} + + To install ALL nodes (including optional): + ansible-playbook playbook.yml --tags comfyui-nodes-all + + # + # ComfyUI Image Models Download + # + - name: Download ComfyUI Image Generation Models + tags: [comfyui-models-image, comfyui-models-all, comfyui-essential] + block: + - name: Download essential image generation models + shell: | + python3 -c " + from huggingface_hub import snapshot_download + import os + os.environ['HF_HOME'] = '{{ cache_dir }}' + print('Downloading {{ item.name }}...') + snapshot_download( + repo_id='{{ item.name }}', + cache_dir='{{ cache_dir }}', + token=os.environ.get('HF_TOKEN') + ) + print('Completed {{ item.name }}') + " + environment: + HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" + HF_HOME: "{{ cache_dir }}" + loop: "{{ comfyui_image_models | selectattr('essential', 'equalto', true) | list }}" + loop_control: + label: "{{ item.name }} ({{ item.size_gb }}GB)" + async: 3600 + poll: 30 + ignore_errors: yes + + - name: Display image models summary + debug: + msg: | + Image generation models downloaded: + {% for model in comfyui_image_models | selectattr('essential', 'equalto', true) | list %} + - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM) + {% endfor %} + + Total size: ~{{ (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) }}GB + + # + # ComfyUI Video Models Download + # + - name: Download ComfyUI Video Generation Models + tags: [comfyui-models-video, comfyui-models-all] + block: + - name: Download essential video generation models + shell: | + python3 -c " + from huggingface_hub import snapshot_download + import os + os.environ['HF_HOME'] = '{{ cache_dir }}' + print('Downloading {{ item.name }}...') + snapshot_download( + repo_id='{{ item.name }}', + cache_dir='{{ cache_dir }}', + token=os.environ.get('HF_TOKEN') + ) + print('Completed {{ item.name }}') + " + environment: + HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" + HF_HOME: "{{ cache_dir }}" + loop: "{{ comfyui_video_models | selectattr('essential', 'equalto', true) | list }}" + loop_control: + label: "{{ item.name }} ({{ item.size_gb }}GB)" + async: 3600 + poll: 30 + ignore_errors: yes + + - name: Display video models summary + debug: + msg: | + Video generation models downloaded: + {% for model in comfyui_video_models | selectattr('essential', 'equalto', true) | list %} + - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM) + {% endfor %} + + # + # ComfyUI Audio Models Download + # + - name: Download ComfyUI Audio Generation Models + tags: [comfyui-models-audio, comfyui-models-all] + block: + - name: Download essential audio generation models + shell: | + python3 -c " + from huggingface_hub import snapshot_download + import os + os.environ['HF_HOME'] = '{{ cache_dir }}' + print('Downloading {{ item.name }}...') + snapshot_download( + repo_id='{{ item.name }}', + cache_dir='{{ cache_dir }}', + token=os.environ.get('HF_TOKEN') + ) + print('Completed {{ item.name }}') + " + environment: + HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" + HF_HOME: "{{ cache_dir }}" + loop: "{{ comfyui_audio_models | selectattr('essential', 'equalto', true) | list }}" + loop_control: + label: "{{ item.name }} ({{ item.size_gb }}GB)" + async: 3600 + poll: 30 + ignore_errors: yes + + - name: Display audio models summary + debug: + msg: | + Audio generation models downloaded: + {% for model in comfyui_audio_models | selectattr('essential', 'equalto', true) | list %} + - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB) + {% endfor %} + + # + # ComfyUI Support Models Download (CLIP, IP-Adapter, ControlNet) + # + - name: Download ComfyUI Support Models + tags: [comfyui-models-support, comfyui-models-all, comfyui-essential] + block: + - name: Download essential support models (CLIP, IP-Adapter) + shell: | + python3 -c " + from huggingface_hub import snapshot_download + import os + os.environ['HF_HOME'] = '{{ cache_dir }}' + print('Downloading {{ item.name }}...') + snapshot_download( + repo_id='{{ item.name }}', + cache_dir='{{ cache_dir }}', + token=os.environ.get('HF_TOKEN') + ) + print('Completed {{ item.name }}') + " + environment: + HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" + HF_HOME: "{{ cache_dir }}" + loop: "{{ comfyui_support_models | selectattr('essential', 'equalto', true) | list }}" + loop_control: + label: "{{ item.name }} ({{ item.size_gb }}GB)" + async: 1800 + poll: 30 + ignore_errors: yes + + - name: Display support models summary + debug: + msg: | + Support models downloaded: + {% for model in comfyui_support_models | selectattr('essential', 'equalto', true) | list %} + - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB) + {% endfor %} + + Total ComfyUI models cache: ~{{ + (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + + (comfyui_video_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + + (comfyui_audio_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + + (comfyui_support_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + }}GB + # # Download AI Models # diff --git a/scripts/download-models.sh b/scripts/download-models.sh deleted file mode 100644 index e093f66..0000000 --- a/scripts/download-models.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# -# Download AI Models -# Wrapper for Ansible models tag -# - -set -e - -cd "$(dirname "$0")/.." - -echo "=========================================" -echo " Downloading AI Models (~37GB)" -echo "=========================================" -echo "" - -# Source .env if it exists -if [ -f .env ]; then - set -a - source .env - set +a -fi - -# Check HF_TOKEN -if [ -z "$HF_TOKEN" ]; then - echo "Error: HF_TOKEN not set" - echo "Add HF_TOKEN to .env file" - exit 1 -fi - -# Run Ansible with models tag -ansible-playbook playbook.yml --tags models - -echo "" -echo "=========================================" -echo " Model download complete!" -echo "=========================================" diff --git a/scripts/install.sh b/scripts/install.sh deleted file mode 100644 index 7a24314..0000000 --- a/scripts/install.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# -# Install AI Infrastructure -# Wrapper script for Ansible playbook -# -# Usage: -# ./install.sh # Full installation -# ./install.sh --tags base # Install specific components -# - -set -e - -cd "$(dirname "$0")/.." - -echo "=========================================" -echo " RunPod AI Infrastructure Installation" -echo "=========================================" -echo "" - -# Check if Ansible is installed -if ! command -v ansible-playbook &> /dev/null; then - echo "Ansible not found. Installing..." - sudo apt update - sudo apt install -y ansible -fi - -# Check for .env file -if [ ! -f .env ]; then - echo "Warning: .env file not found" - echo "Copy .env.example to .env and add your HF_TOKEN" - echo "" -fi - -# Source .env if it exists -if [ -f .env ]; then - set -a - source .env - set +a -fi - -# Run Ansible playbook -echo "Running Ansible playbook..." -echo "" - -ansible-playbook playbook.yml "$@" - -echo "" -echo "=========================================" -echo " Installation complete!" -echo "=========================================" diff --git a/scripts/prepare-template.sh b/scripts/prepare-template.sh deleted file mode 100644 index 0ffd15a..0000000 --- a/scripts/prepare-template.sh +++ /dev/null @@ -1,314 +0,0 @@ -#!/bin/bash -# -# RunPod Template Preparation Script -# Prepares a RunPod instance for template creation -# -# This script: -# 1. Installs Docker & Docker Compose -# 2. Installs Tailscale -# 3. Builds all Docker images -# 4. Pre-downloads all models -# 5. Validates everything works -# 6. Cleans up for template creation -# -# Usage: ./prepare-template.sh -# Run this on the RunPod instance you want to save as a template -# - -set -e # Exit on error - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Logging functions -log_info() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -log_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $1" -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -# Check if running on RunPod -check_environment() { - log_info "Checking environment..." - - if ! nvidia-smi &> /dev/null; then - log_error "NVIDIA GPU not detected. Are you running on a GPU instance?" - exit 1 - fi - - if [ ! -d "/workspace" ]; then - log_warn "/workspace directory not found. Creating it..." - mkdir -p /workspace - fi - - log_success "Environment check passed" -} - -# Install Docker -install_docker() { - if command -v docker &> /dev/null; then - log_info "Docker already installed: $(docker --version)" - return - fi - - log_info "Installing Docker..." - curl -fsSL https://get.docker.com -o get-docker.sh - sh get-docker.sh - rm get-docker.sh - - # Start Docker daemon (RunPod requires --iptables=false --bridge=none) - log_info "Starting Docker daemon..." - pkill dockerd 2>/dev/null || true - sleep 2 - dockerd --iptables=false --bridge=none > /var/log/dockerd.log 2>&1 & - sleep 10 - - # Verify Docker is running - if docker ps &> /dev/null; then - log_success "Docker installed and running: $(docker --version)" - else - log_error "Docker failed to start. Check /var/log/dockerd.log" - exit 1 - fi -} - -# Install Docker Compose -install_docker_compose() { - if docker compose version &> /dev/null; then - log_info "Docker Compose already installed: $(docker compose version)" - return - fi - - log_info "Installing Docker Compose..." - - # Docker Compose is usually bundled with Docker now - # If not, install it separately - if ! docker compose version &> /dev/null; then - DOCKER_COMPOSE_VERSION="v2.23.0" - curl -L "https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose - chmod +x /usr/local/bin/docker-compose - fi - - log_success "Docker Compose installed: $(docker compose version)" -} - -# Install Tailscale -install_tailscale() { - if command -v tailscale &> /dev/null; then - log_info "Tailscale already installed: $(tailscale version)" - return - fi - - log_info "Installing Tailscale..." - curl -fsSL https://tailscale.com/install.sh | sh - - log_success "Tailscale installed: $(tailscale version)" -} - -# Build Docker images -build_docker_images() { - log_info "Building Docker images..." - - cd /workspace/ai - - # Use legacy builder (buildkit has permission issues in RunPod) - export DOCKER_BUILDKIT=0 - - # Build orchestrator - log_info "Building orchestrator..." - docker compose -f compose.yaml build orchestrator - - # Build vLLM - log_info "Building vLLM..." - docker compose -f compose.yaml build vllm-qwen - - # Build MusicGen - log_info "Building MusicGen..." - docker compose -f compose.yaml build musicgen - - # Pull Flux image (pre-built) - log_info "Pulling Flux.1 image..." - docker pull ghcr.io/matatonic/openedai-images-flux:latest - - log_success "All Docker images built" -} - -# Pre-download models -download_models() { - log_info "Pre-downloading AI models (this will take 30-45 minutes)..." - - cd /workspace/ai - - # Create model cache directories - mkdir -p /workspace/huggingface_cache - mkdir -p /workspace/flux/models - mkdir -p /workspace/musicgen/models - - # Download Qwen 2.5 7B - log_info "Downloading Qwen 2.5 7B (14GB)..." - docker compose --profile text up -d vllm-qwen - - # Wait for model to download - log_info "Waiting for Qwen model to download..." - while ! docker logs ai_vllm-qwen_1 2>&1 | grep -q "Model loaded successfully\|AsyncLLMEngine initialized"; do - echo -n "." - sleep 10 - done - echo "" - log_success "Qwen 2.5 7B downloaded" - - docker compose stop vllm-qwen - - # Download Flux.1 Schnell - log_info "Downloading Flux.1 Schnell (12GB)..." - docker compose --profile image up -d flux - - log_info "Waiting for Flux model to download..." - sleep 180 # Flux takes about 3 minutes to download and initialize - log_success "Flux.1 Schnell downloaded" - - docker compose stop flux - - # Download MusicGen Medium - log_info "Downloading MusicGen Medium (11GB)..." - docker compose --profile audio up -d musicgen - - log_info "Waiting for MusicGen model to download..." - while ! docker logs ai_musicgen_1 2>&1 | grep -q "Model loaded successfully\|initialized successfully"; do - echo -n "." - sleep 10 - done - echo "" - log_success "MusicGen Medium downloaded" - - docker compose stop musicgen - - log_success "All models downloaded and cached" -} - -# Validate installation -validate_installation() { - log_info "Validating installation..." - - cd /workspace/ai - - # Start orchestrator - log_info "Starting orchestrator for validation..." - docker compose -f compose.yaml up -d orchestrator - - sleep 10 - - # Check orchestrator health - if curl -s http://localhost:9000/health | grep -q "healthy\|ok"; then - log_success "Orchestrator is healthy" - else - log_error "Orchestrator health check failed" - docker logs ai_orchestrator - exit 1 - fi - - # Check models are cached - if [ -d "/workspace/huggingface_cache" ] && [ "$(ls -A /workspace/huggingface_cache)" ]; then - log_success "Hugging Face cache populated" - else - log_warn "Hugging Face cache may be empty" - fi - - # Stop orchestrator - docker compose -f compose.yaml down - - log_success "Validation passed" -} - -# Clean up for template creation -cleanup_for_template() { - log_info "Cleaning up for template creation..." - - # Remove sensitive data - log_info "Removing sensitive files..." - rm -f /workspace/ai/.env - rm -f /root/.ssh/known_hosts - rm -f /root/.bash_history - rm -f /root/.python_history - - # Clear logs - log_info "Clearing logs..." - find /var/log -type f -name "*.log" -delete 2>/dev/null || true - journalctl --vacuum-time=1s 2>/dev/null || true - - # Logout from Tailscale - log_info "Logging out from Tailscale..." - tailscale logout 2>/dev/null || true - - # Clean Docker (but keep images) - log_info "Cleaning Docker cache..." - docker system prune -af --volumes || true - - # Create template marker - log_info "Creating template version marker..." - cat > /workspace/TEMPLATE_VERSION </dev/null || echo "installed") -- Orchestrator (ai_orchestrator) -- Text Generation (vLLM + Qwen 2.5 7B) -- Image Generation (Flux.1 Schnell) -- Music Generation (MusicGen Medium) -Models Cached: ~37GB -EOF - - log_success "Cleanup complete" -} - -# Main execution -main() { - log_info "======================================" - log_info "RunPod Template Preparation Script" - log_info "======================================" - log_info "" - - check_environment - install_docker - install_docker_compose - install_tailscale - build_docker_images - download_models - validate_installation - cleanup_for_template - - log_info "" - log_success "======================================" - log_success "Template Preparation Complete!" - log_success "======================================" - log_info "" - log_info "Next steps:" - log_info "1. Review /workspace/TEMPLATE_VERSION" - log_info "2. Go to RunPod Dashboard → My Pods" - log_info "3. Select this pod → ⋮ → Save as Template" - log_info "4. Name: multi-modal-ai-v1.0" - log_info "5. Test deployment from template" - log_info "" - log_info "Template will enable 2-3 minute deployments instead of 60-90 minutes!" - log_info "" -} - -# Run main function -main "$@" diff --git a/scripts/stop-all.sh b/scripts/stop-all.sh index df06e6c..d585a67 100644 --- a/scripts/stop-all.sh +++ b/scripts/stop-all.sh @@ -17,8 +17,6 @@ pkill -f "orchestrator_subprocess.py" || echo "Orchestrator not running" echo "Stopping model services..." pkill -f "models/vllm/server.py" || echo "vLLM not running" -pkill -f "models/flux/server.py" || echo "Flux not running" -pkill -f "models/musicgen/server.py" || echo "MusicGen not running" echo "" echo "All services stopped" diff --git a/systemd/ai-orchestrator.service b/systemd/ai-orchestrator.service deleted file mode 100644 index 8610ce9..0000000 --- a/systemd/ai-orchestrator.service +++ /dev/null @@ -1,24 +0,0 @@ -[Unit] -Description=AI Model Orchestrator for RunPod -After=network.target -StartLimitIntervalSec=0 - -[Service] -Type=simple -Restart=always -RestartSec=10 -User=root -WorkingDirectory=/workspace/ai -EnvironmentFile=/workspace/ai/.env -ExecStart=/usr/bin/python3 /workspace/ai/model-orchestrator/orchestrator_subprocess.py -StandardOutput=journal -StandardError=journal -SyslogIdentifier=ai-orchestrator - -# Process management -KillMode=process -KillSignal=SIGTERM -TimeoutStopSec=30 - -[Install] -WantedBy=multi-user.target