# GPU Server Docker Compose Configuration # Deploy on RunPod GPU server (10.8.0.2) # Services accessible from VPS (10.8.0.1) via WireGuard VPN version: '3.8' services: # ============================================================================= # vLLM - High-performance LLM Inference Server # ============================================================================= vllm: image: vllm/vllm-openai:latest container_name: gpu_vllm restart: unless-stopped runtime: nvidia environment: NVIDIA_VISIBLE_DEVICES: all CUDA_VISIBLE_DEVICES: "0" HF_TOKEN: ${HF_TOKEN:-} volumes: - ${MODELS_PATH:-/workspace/models}:/root/.cache/huggingface command: - --model - meta-llama/Meta-Llama-3.1-8B-Instruct # Change model here - --host - 0.0.0.0 - --port - 8000 - --tensor-parallel-size - "1" - --gpu-memory-utilization - "0.85" # Leave 15% for other tasks - --max-model-len - "8192" - --dtype - auto - --trust-remote-code ports: - "8000:8000" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 start_period: 120s # Model loading takes time deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] labels: - "service=vllm" - "stack=gpu-ai" # ============================================================================= # ComfyUI - Advanced Stable Diffusion Interface # ============================================================================= comfyui: image: ghcr.io/ai-dock/comfyui:latest container_name: gpu_comfyui restart: unless-stopped runtime: nvidia environment: NVIDIA_VISIBLE_DEVICES: all TZ: ${TIMEZONE:-Europe/Berlin} # ComfyUI auto-installs custom nodes on first run COMFYUI_FLAGS: "--listen 0.0.0.0 --port 8188" volumes: - comfyui_data:/data - ${MODELS_PATH:-/workspace/models}/comfyui:/opt/ComfyUI/models - comfyui_output:/opt/ComfyUI/output - comfyui_input:/opt/ComfyUI/input ports: - "8188:8188" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8188/"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] labels: - "service=comfyui" - "stack=gpu-ai" # ============================================================================= # Axolotl - LLM Fine-tuning Framework # ============================================================================= # Note: This service uses "profiles" - only starts when explicitly requested # Start with: docker compose --profile training up -d axolotl axolotl: image: winglian/axolotl:main-py3.11-cu121-2.2.2 container_name: gpu_training runtime: nvidia volumes: - ./training/configs:/workspace/configs - ./training/data:/workspace/data - ./training/output:/workspace/output - ${MODELS_PATH:-/workspace/models}:/workspace/models - training_cache:/root/.cache environment: NVIDIA_VISIBLE_DEVICES: all WANDB_API_KEY: ${WANDB_API_KEY:-} HF_TOKEN: ${HF_TOKEN:-} working_dir: /workspace # Default command - override when running specific training command: sleep infinity deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] profiles: - training labels: - "service=axolotl" - "stack=gpu-ai" # ============================================================================= # JupyterLab - Interactive Development Environment # ============================================================================= jupyter: image: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel container_name: gpu_jupyter restart: unless-stopped runtime: nvidia volumes: - ./notebooks:/workspace/notebooks - ${MODELS_PATH:-/workspace/models}:/workspace/models - jupyter_cache:/root/.cache ports: - "8888:8888" environment: NVIDIA_VISIBLE_DEVICES: all JUPYTER_ENABLE_LAB: "yes" JUPYTER_TOKEN: ${JUPYTER_TOKEN:-pivoine-ai-2025} HF_TOKEN: ${HF_TOKEN:-} command: | bash -c " pip install --quiet jupyterlab transformers datasets accelerate bitsandbytes peft trl sentencepiece protobuf && jupyter lab --ip=0.0.0.0 --port=8888 --allow-root --no-browser --NotebookApp.token='${JUPYTER_TOKEN:-pivoine-ai-2025}' " healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8888/"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] labels: - "service=jupyter" - "stack=gpu-ai" # ============================================================================= # Netdata - System & GPU Monitoring # ============================================================================= netdata: image: netdata/netdata:latest container_name: gpu_netdata restart: unless-stopped runtime: nvidia hostname: gpu-runpod cap_add: - SYS_PTRACE - SYS_ADMIN security_opt: - apparmor:unconfined environment: NVIDIA_VISIBLE_DEVICES: all TZ: ${TIMEZONE:-Europe/Berlin} volumes: - /sys:/host/sys:ro - /proc:/host/proc:ro - /var/run/docker.sock:/var/run/docker.sock:ro - /etc/os-release:/host/etc/os-release:ro - netdata_config:/etc/netdata - netdata_cache:/var/cache/netdata - netdata_lib:/var/lib/netdata ports: - "19999:19999" labels: - "service=netdata" - "stack=gpu-ai" # ============================================================================= # Volumes # ============================================================================= volumes: # ComfyUI data comfyui_data: driver: local comfyui_output: driver: local comfyui_input: driver: local # Training data training_cache: driver: local # Jupyter data jupyter_cache: driver: local # Netdata data netdata_config: driver: local netdata_cache: driver: local netdata_lib: driver: local # ============================================================================= # Networks # ============================================================================= networks: default: driver: bridge ipam: config: - subnet: 172.25.0.0/24