Initial commit: RunPod multi-modal AI orchestration stack

- Multi-modal AI infrastructure for RunPod RTX 4090 - Automatic model orchestration (text, image, music) - Text: vLLM + Qwen 2.5 7B Instruct - Image: Flux.1 Schnell via OpenEDAI - Music: MusicGen Medium via AudioCraft - Cost-optimized sequential loading on single GPU - Template preparation scripts for rapid deployment - Comprehensive documentation (README, DEPLOYMENT, TEMPLATE)
2025-11-21 14:34:55 +01:00
commit 277f1c95bd
35 changed files with 7654 additions and 0 deletions
--- a/docker-compose.gpu.yaml
+++ b/docker-compose.gpu.yaml
@@ -0,0 +1,104 @@
+version: '3.8'
+
+# Multi-Modal AI Orchestration for RunPod RTX 4090
+# Manages text, image, and music generation with sequential model loading
+
+services:
+  # ============================================================================
+  # ORCHESTRATOR (Always Running)
+  # ============================================================================
+  orchestrator:
+    build: ./model-orchestrator
+    container_name: ai_orchestrator
+    ports:
+      - "9000:9000"
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - ./model-orchestrator/models.yaml:/app/models.yaml:ro
+    environment:
+      - MODELS_CONFIG=/app/models.yaml
+      - COMPOSE_PROJECT_NAME=ai
+      - GPU_MEMORY_GB=24
+    restart: unless-stopped
+    network_mode: host
+
+  # ============================================================================
+  # TEXT GENERATION (vLLM + Qwen 2.5 7B)
+  # ============================================================================
+  vllm-qwen:
+    build: ./vllm
+    container_name: ai_vllm-qwen_1
+    ports:
+      - "8001:8000"
+    volumes:
+      - /workspace/huggingface_cache:/workspace/huggingface_cache
+    environment:
+      - HF_TOKEN=${HF_TOKEN}
+      - VLLM_HOST=0.0.0.0
+      - VLLM_PORT=8000
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    profiles: ["text"]  # Only start when requested by orchestrator
+    restart: "no"  # Orchestrator manages lifecycle
+
+  # ============================================================================
+  # IMAGE GENERATION (Flux.1 Schnell)
+  # ============================================================================
+  flux:
+    image: ghcr.io/matatonic/openedai-images-flux:latest
+    container_name: ai_flux_1
+    ports:
+      - "8002:5005"
+    volumes:
+      - /workspace/flux/models:/app/models
+      - ./flux/config:/app/config:ro
+    environment:
+      - HF_TOKEN=${HF_TOKEN}
+      - CONFIG_PATH=/app/config/config.json
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    profiles: ["image"]  # Only start when requested by orchestrator
+    restart: "no"  # Orchestrator manages lifecycle
+
+  # ============================================================================
+  # MUSIC GENERATION (MusicGen Medium)
+  # ============================================================================
+  musicgen:
+    build: ./musicgen
+    container_name: ai_musicgen_1
+    ports:
+      - "8003:8000"
+    volumes:
+      - /workspace/musicgen/models:/app/models
+    environment:
+      - HF_TOKEN=${HF_TOKEN}
+      - MODEL_NAME=facebook/musicgen-medium
+      - HOST=0.0.0.0
+      - PORT=8000
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    profiles: ["audio"]  # Only start when requested by orchestrator
+    restart: "no"  # Orchestrator manages lifecycle
+
+# ============================================================================
+# VOLUMES
+# ============================================================================
+# Model caches are stored on RunPod's /workspace directory (922TB network volume)
+# This persists across pod restarts and reduces model download times
+
+# No named volumes - using host paths on RunPod /workspace