Files
runpod/gpu-server-compose.yaml

238 lines
6.8 KiB
YAML
Raw Normal View History

# GPU Server Docker Compose Configuration
# Deploy on RunPod GPU server (10.8.0.2)
# Services accessible from VPS (10.8.0.1) via WireGuard VPN
version: '3.8'
services:
# =============================================================================
# vLLM - High-performance LLM Inference Server
# =============================================================================
vllm:
image: vllm/vllm-openai:latest
container_name: gpu_vllm
restart: unless-stopped
runtime: nvidia
environment:
NVIDIA_VISIBLE_DEVICES: all
CUDA_VISIBLE_DEVICES: "0"
HF_TOKEN: ${HF_TOKEN:-}
volumes:
- ${MODELS_PATH:-/workspace/models}:/root/.cache/huggingface
command:
- --model
- meta-llama/Meta-Llama-3.1-8B-Instruct # Change model here
- --host
- 0.0.0.0
- --port
- 8000
- --tensor-parallel-size
- "1"
- --gpu-memory-utilization
- "0.85" # Leave 15% for other tasks
- --max-model-len
- "8192"
- --dtype
- auto
- --trust-remote-code
ports:
- "8000:8000"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s # Model loading takes time
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
labels:
- "service=vllm"
- "stack=gpu-ai"
# =============================================================================
# ComfyUI - Advanced Stable Diffusion Interface
# =============================================================================
comfyui:
image: ghcr.io/ai-dock/comfyui:latest
container_name: gpu_comfyui
restart: unless-stopped
runtime: nvidia
environment:
NVIDIA_VISIBLE_DEVICES: all
TZ: ${TIMEZONE:-Europe/Berlin}
# ComfyUI auto-installs custom nodes on first run
COMFYUI_FLAGS: "--listen 0.0.0.0 --port 8188"
volumes:
- comfyui_data:/data
- ${MODELS_PATH:-/workspace/models}/comfyui:/opt/ComfyUI/models
- comfyui_output:/opt/ComfyUI/output
- comfyui_input:/opt/ComfyUI/input
ports:
- "8188:8188"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8188/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
labels:
- "service=comfyui"
- "stack=gpu-ai"
# =============================================================================
# Axolotl - LLM Fine-tuning Framework
# =============================================================================
# Note: This service uses "profiles" - only starts when explicitly requested
# Start with: docker compose --profile training up -d axolotl
axolotl:
image: winglian/axolotl:main-py3.11-cu121-2.2.2
container_name: gpu_training
runtime: nvidia
volumes:
- ./training/configs:/workspace/configs
- ./training/data:/workspace/data
- ./training/output:/workspace/output
- ${MODELS_PATH:-/workspace/models}:/workspace/models
- training_cache:/root/.cache
environment:
NVIDIA_VISIBLE_DEVICES: all
WANDB_API_KEY: ${WANDB_API_KEY:-}
HF_TOKEN: ${HF_TOKEN:-}
working_dir: /workspace
# Default command - override when running specific training
command: sleep infinity
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
profiles:
- training
labels:
- "service=axolotl"
- "stack=gpu-ai"
# =============================================================================
# JupyterLab - Interactive Development Environment
# =============================================================================
jupyter:
image: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel
container_name: gpu_jupyter
restart: unless-stopped
runtime: nvidia
volumes:
- ./notebooks:/workspace/notebooks
- ${MODELS_PATH:-/workspace/models}:/workspace/models
- jupyter_cache:/root/.cache
ports:
- "8888:8888"
environment:
NVIDIA_VISIBLE_DEVICES: all
JUPYTER_ENABLE_LAB: "yes"
JUPYTER_TOKEN: ${JUPYTER_TOKEN:-pivoine-ai-2025}
HF_TOKEN: ${HF_TOKEN:-}
command: |
bash -c "
pip install --quiet jupyterlab transformers datasets accelerate bitsandbytes peft trl sentencepiece protobuf &&
jupyter lab --ip=0.0.0.0 --port=8888 --allow-root --no-browser --NotebookApp.token='${JUPYTER_TOKEN:-pivoine-ai-2025}'
"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8888/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
labels:
- "service=jupyter"
- "stack=gpu-ai"
# =============================================================================
# Netdata - System & GPU Monitoring
# =============================================================================
netdata:
image: netdata/netdata:latest
container_name: gpu_netdata
restart: unless-stopped
runtime: nvidia
hostname: gpu-runpod
cap_add:
- SYS_PTRACE
- SYS_ADMIN
security_opt:
- apparmor:unconfined
environment:
NVIDIA_VISIBLE_DEVICES: all
TZ: ${TIMEZONE:-Europe/Berlin}
volumes:
- /sys:/host/sys:ro
- /proc:/host/proc:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- /etc/os-release:/host/etc/os-release:ro
- netdata_config:/etc/netdata
- netdata_cache:/var/cache/netdata
- netdata_lib:/var/lib/netdata
ports:
- "19999:19999"
labels:
- "service=netdata"
- "stack=gpu-ai"
# =============================================================================
# Volumes
# =============================================================================
volumes:
# ComfyUI data
comfyui_data:
driver: local
comfyui_output:
driver: local
comfyui_input:
driver: local
# Training data
training_cache:
driver: local
# Jupyter data
jupyter_cache:
driver: local
# Netdata data
netdata_config:
driver: local
netdata_cache:
driver: local
netdata_lib:
driver: local
# =============================================================================
# Networks
# =============================================================================
networks:
default:
driver: bridge
ipam:
config:
- subnet: 172.25.0.0/24