Initial commit: RunPod multi-modal AI orchestration stack

- Multi-modal AI infrastructure for RunPod RTX 4090 - Automatic model orchestration (text, image, music) - Text: vLLM + Qwen 2.5 7B Instruct - Image: Flux.1 Schnell via OpenEDAI - Music: MusicGen Medium via AudioCraft - Cost-optimized sequential loading on single GPU - Template preparation scripts for rapid deployment - Comprehensive documentation (README, DEPLOYMENT, TEMPLATE)
2025-11-21 14:34:55 +01:00
commit 277f1c95bd
35 changed files with 7654 additions and 0 deletions
--- a/scripts/prepare-template.sh
+++ b/scripts/prepare-template.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+#
+# RunPod Template Preparation Script
+# Prepares a RunPod instance for template creation
+#
+# This script:
+# 1. Installs Docker & Docker Compose
+# 2. Installs Tailscale
+# 3. Builds all Docker images
+# 4. Pre-downloads all models
+# 5. Validates everything works
+# 6. Cleans up for template creation
+#
+# Usage: ./prepare-template.sh
+# Run this on the RunPod instance you want to save as a template
+#
+
+set -e  # Exit on error
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Logging functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if running on RunPod
+check_environment() {
+    log_info "Checking environment..."
+
+    if ! nvidia-smi &> /dev/null; then
+        log_error "NVIDIA GPU not detected. Are you running on a GPU instance?"
+        exit 1
+    fi
+
+    if [ ! -d "/workspace" ]; then
+        log_warn "/workspace directory not found. Creating it..."
+        mkdir -p /workspace
+    fi
+
+    log_success "Environment check passed"
+}
+
+# Install Docker
+install_docker() {
+    if command -v docker &> /dev/null; then
+        log_info "Docker already installed: $(docker --version)"
+        return
+    fi
+
+    log_info "Installing Docker..."
+    curl -fsSL https://get.docker.com -o get-docker.sh
+    sh get-docker.sh
+    rm get-docker.sh
+
+    # Start Docker
+    systemctl start docker || service docker start
+    systemctl enable docker || true
+
+    log_success "Docker installed: $(docker --version)"
+}
+
+# Install Docker Compose
+install_docker_compose() {
+    if docker compose version &> /dev/null; then
+        log_info "Docker Compose already installed: $(docker compose version)"
+        return
+    fi
+
+    log_info "Installing Docker Compose..."
+
+    # Docker Compose is usually bundled with Docker now
+    # If not, install it separately
+    if ! docker compose version &> /dev/null; then
+        DOCKER_COMPOSE_VERSION="v2.23.0"
+        curl -L "https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
+        chmod +x /usr/local/bin/docker-compose
+    fi
+
+    log_success "Docker Compose installed: $(docker compose version)"
+}
+
+# Install Tailscale
+install_tailscale() {
+    if command -v tailscale &> /dev/null; then
+        log_info "Tailscale already installed: $(tailscale version)"
+        return
+    fi
+
+    log_info "Installing Tailscale..."
+    curl -fsSL https://tailscale.com/install.sh | sh
+
+    log_success "Tailscale installed: $(tailscale version)"
+}
+
+# Build Docker images
+build_docker_images() {
+    log_info "Building Docker images..."
+
+    cd /workspace/ai
+
+    # Build orchestrator
+    log_info "Building orchestrator..."
+    docker compose -f docker-compose.gpu.yaml build orchestrator
+
+    # Build vLLM
+    log_info "Building vLLM..."
+    docker compose -f docker-compose.gpu.yaml build vllm-qwen
+
+    # Build MusicGen
+    log_info "Building MusicGen..."
+    docker compose -f docker-compose.gpu.yaml build musicgen
+
+    # Pull Flux image (pre-built)
+    log_info "Pulling Flux.1 image..."
+    docker pull ghcr.io/matatonic/openedai-images-flux:latest
+
+    log_success "All Docker images built"
+}
+
+# Pre-download models
+download_models() {
+    log_info "Pre-downloading AI models (this will take 30-45 minutes)..."
+
+    cd /workspace/ai
+
+    # Create model cache directories
+    mkdir -p /workspace/huggingface_cache
+    mkdir -p /workspace/flux/models
+    mkdir -p /workspace/musicgen/models
+
+    # Download Qwen 2.5 7B
+    log_info "Downloading Qwen 2.5 7B (14GB)..."
+    docker compose --profile text up -d vllm-qwen
+
+    # Wait for model to download
+    log_info "Waiting for Qwen model to download..."
+    while ! docker logs ai_vllm-qwen_1 2>&1 | grep -q "Model loaded successfully\|AsyncLLMEngine initialized"; do
+        echo -n "."
+        sleep 10
+    done
+    echo ""
+    log_success "Qwen 2.5 7B downloaded"
+
+    docker compose stop vllm-qwen
+
+    # Download Flux.1 Schnell
+    log_info "Downloading Flux.1 Schnell (12GB)..."
+    docker compose --profile image up -d flux
+
+    log_info "Waiting for Flux model to download..."
+    sleep 180  # Flux takes about 3 minutes to download and initialize
+    log_success "Flux.1 Schnell downloaded"
+
+    docker compose stop flux
+
+    # Download MusicGen Medium
+    log_info "Downloading MusicGen Medium (11GB)..."
+    docker compose --profile audio up -d musicgen
+
+    log_info "Waiting for MusicGen model to download..."
+    while ! docker logs ai_musicgen_1 2>&1 | grep -q "Model loaded successfully\|initialized successfully"; do
+        echo -n "."
+        sleep 10
+    done
+    echo ""
+    log_success "MusicGen Medium downloaded"
+
+    docker compose stop musicgen
+
+    log_success "All models downloaded and cached"
+}
+
+# Validate installation
+validate_installation() {
+    log_info "Validating installation..."
+
+    cd /workspace/ai
+
+    # Start orchestrator
+    log_info "Starting orchestrator for validation..."
+    docker compose -f docker-compose.gpu.yaml up -d orchestrator
+
+    sleep 10
+
+    # Check orchestrator health
+    if curl -s http://localhost:9000/health | grep -q "healthy\|ok"; then
+        log_success "Orchestrator is healthy"
+    else
+        log_error "Orchestrator health check failed"
+        docker logs ai_orchestrator
+        exit 1
+    fi
+
+    # Check models are cached
+    if [ -d "/workspace/huggingface_cache" ] && [ "$(ls -A /workspace/huggingface_cache)" ]; then
+        log_success "Hugging Face cache populated"
+    else
+        log_warn "Hugging Face cache may be empty"
+    fi
+
+    # Stop orchestrator
+    docker compose -f docker-compose.gpu.yaml down
+
+    log_success "Validation passed"
+}
+
+# Clean up for template creation
+cleanup_for_template() {
+    log_info "Cleaning up for template creation..."
+
+    # Remove sensitive data
+    log_info "Removing sensitive files..."
+    rm -f /workspace/ai/.env
+    rm -f /root/.ssh/known_hosts
+    rm -f /root/.bash_history
+    rm -f /root/.python_history
+
+    # Clear logs
+    log_info "Clearing logs..."
+    find /var/log -type f -name "*.log" -delete 2>/dev/null || true
+    journalctl --vacuum-time=1s 2>/dev/null || true
+
+    # Logout from Tailscale
+    log_info "Logging out from Tailscale..."
+    tailscale logout 2>/dev/null || true
+
+    # Clean Docker (but keep images)
+    log_info "Cleaning Docker cache..."
+    docker system prune -af --volumes || true
+
+    # Create template marker
+    log_info "Creating template version marker..."
+    cat > /workspace/TEMPLATE_VERSION <<EOF
+RunPod Multi-Modal AI Template
+Version: 1.0
+Created: $(date)
+Components:
+- Docker $(docker --version | cut -d' ' -f3)
+- Docker Compose $(docker compose version --short)
+- Tailscale $(tailscale version --short 2>/dev/null || echo "installed")
+- Orchestrator (ai_orchestrator)
+- Text Generation (vLLM + Qwen 2.5 7B)
+- Image Generation (Flux.1 Schnell)
+- Music Generation (MusicGen Medium)
+Models Cached: ~37GB
+EOF
+
+    log_success "Cleanup complete"
+}
+
+# Main execution
+main() {
+    log_info "======================================"
+    log_info "RunPod Template Preparation Script"
+    log_info "======================================"
+    log_info ""
+
+    check_environment
+    install_docker
+    install_docker_compose
+    install_tailscale
+    build_docker_images
+    download_models
+    validate_installation
+    cleanup_for_template
+
+    log_info ""
+    log_success "======================================"
+    log_success "Template Preparation Complete!"
+    log_success "======================================"
+    log_info ""
+    log_info "Next steps:"
+    log_info "1. Review /workspace/TEMPLATE_VERSION"
+    log_info "2. Go to RunPod Dashboard → My Pods"
+    log_info "3. Select this pod → ⋮ → Save as Template"
+    log_info "4. Name: multi-modal-ai-v1.0"
+    log_info "5. Test deployment from template"
+    log_info ""
+    log_info "Template will enable 2-3 minute deployments instead of 60-90 minutes!"
+    log_info ""
+}
+
+# Run main function
+main "$@"