version: '3.8' # Multi-Modal AI Orchestration for RunPod RTX 4090 # Manages text, image, and music generation with sequential model loading services: # ============================================================================ # ORCHESTRATOR (Always Running) # ============================================================================ orchestrator: build: ./model-orchestrator container_name: ai_orchestrator ports: - "9000:9000" volumes: - /var/run/docker.sock:/var/run/docker.sock:ro - ./model-orchestrator/models.yaml:/app/models.yaml:ro environment: - MODELS_CONFIG=/app/models.yaml - COMPOSE_PROJECT_NAME=ai - GPU_MEMORY_GB=24 restart: unless-stopped network_mode: host # ============================================================================ # TEXT GENERATION (vLLM + Qwen 2.5 7B) # ============================================================================ vllm-qwen: build: ./vllm container_name: ai_vllm-qwen_1 ports: - "8001:8000" volumes: - /workspace/huggingface_cache:/workspace/huggingface_cache environment: - HF_TOKEN=${HF_TOKEN} - VLLM_HOST=0.0.0.0 - VLLM_PORT=8000 deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] profiles: ["text"] # Only start when requested by orchestrator restart: "no" # Orchestrator manages lifecycle # ============================================================================ # IMAGE GENERATION (Flux.1 Schnell) # ============================================================================ flux: image: ghcr.io/matatonic/openedai-images-flux:latest container_name: ai_flux_1 ports: - "8002:5005" volumes: - /workspace/flux/models:/app/models - ./flux/config:/app/config:ro environment: - HF_TOKEN=${HF_TOKEN} - CONFIG_PATH=/app/config/config.json deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] profiles: ["image"] # Only start when requested by orchestrator restart: "no" # Orchestrator manages lifecycle # ============================================================================ # MUSIC GENERATION (MusicGen Medium) # ============================================================================ musicgen: build: ./musicgen container_name: ai_musicgen_1 ports: - "8003:8000" volumes: - /workspace/musicgen/models:/app/models environment: - HF_TOKEN=${HF_TOKEN} - MODEL_NAME=facebook/musicgen-medium - HOST=0.0.0.0 - PORT=8000 deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] profiles: ["audio"] # Only start when requested by orchestrator restart: "no" # Orchestrator manages lifecycle # ============================================================================ # VOLUMES # ============================================================================ # Model caches are stored on RunPod's /workspace directory (922TB network volume) # This persists across pod restarts and reduces model download times # No named volumes - using host paths on RunPod /workspace