238 lines
6.8 KiB
YAML
238 lines
6.8 KiB
YAML
|
|
# GPU Server Docker Compose Configuration
|
||
|
|
# Deploy on RunPod GPU server (10.8.0.2)
|
||
|
|
# Services accessible from VPS (10.8.0.1) via WireGuard VPN
|
||
|
|
|
||
|
|
version: '3.8'
|
||
|
|
|
||
|
|
services:
|
||
|
|
# =============================================================================
|
||
|
|
# vLLM - High-performance LLM Inference Server
|
||
|
|
# =============================================================================
|
||
|
|
vllm:
|
||
|
|
image: vllm/vllm-openai:latest
|
||
|
|
container_name: gpu_vllm
|
||
|
|
restart: unless-stopped
|
||
|
|
runtime: nvidia
|
||
|
|
environment:
|
||
|
|
NVIDIA_VISIBLE_DEVICES: all
|
||
|
|
CUDA_VISIBLE_DEVICES: "0"
|
||
|
|
HF_TOKEN: ${HF_TOKEN:-}
|
||
|
|
volumes:
|
||
|
|
- ${MODELS_PATH:-/workspace/models}:/root/.cache/huggingface
|
||
|
|
command:
|
||
|
|
- --model
|
||
|
|
- meta-llama/Meta-Llama-3.1-8B-Instruct # Change model here
|
||
|
|
- --host
|
||
|
|
- 0.0.0.0
|
||
|
|
- --port
|
||
|
|
- 8000
|
||
|
|
- --tensor-parallel-size
|
||
|
|
- "1"
|
||
|
|
- --gpu-memory-utilization
|
||
|
|
- "0.85" # Leave 15% for other tasks
|
||
|
|
- --max-model-len
|
||
|
|
- "8192"
|
||
|
|
- --dtype
|
||
|
|
- auto
|
||
|
|
- --trust-remote-code
|
||
|
|
ports:
|
||
|
|
- "8000:8000"
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
start_period: 120s # Model loading takes time
|
||
|
|
deploy:
|
||
|
|
resources:
|
||
|
|
reservations:
|
||
|
|
devices:
|
||
|
|
- driver: nvidia
|
||
|
|
count: 1
|
||
|
|
capabilities: [gpu]
|
||
|
|
labels:
|
||
|
|
- "service=vllm"
|
||
|
|
- "stack=gpu-ai"
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# ComfyUI - Advanced Stable Diffusion Interface
|
||
|
|
# =============================================================================
|
||
|
|
comfyui:
|
||
|
|
image: ghcr.io/ai-dock/comfyui:latest
|
||
|
|
container_name: gpu_comfyui
|
||
|
|
restart: unless-stopped
|
||
|
|
runtime: nvidia
|
||
|
|
environment:
|
||
|
|
NVIDIA_VISIBLE_DEVICES: all
|
||
|
|
TZ: ${TIMEZONE:-Europe/Berlin}
|
||
|
|
# ComfyUI auto-installs custom nodes on first run
|
||
|
|
COMFYUI_FLAGS: "--listen 0.0.0.0 --port 8188"
|
||
|
|
volumes:
|
||
|
|
- comfyui_data:/data
|
||
|
|
- ${MODELS_PATH:-/workspace/models}/comfyui:/opt/ComfyUI/models
|
||
|
|
- comfyui_output:/opt/ComfyUI/output
|
||
|
|
- comfyui_input:/opt/ComfyUI/input
|
||
|
|
ports:
|
||
|
|
- "8188:8188"
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "curl", "-f", "http://localhost:8188/"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
start_period: 60s
|
||
|
|
deploy:
|
||
|
|
resources:
|
||
|
|
reservations:
|
||
|
|
devices:
|
||
|
|
- driver: nvidia
|
||
|
|
count: 1
|
||
|
|
capabilities: [gpu]
|
||
|
|
labels:
|
||
|
|
- "service=comfyui"
|
||
|
|
- "stack=gpu-ai"
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# Axolotl - LLM Fine-tuning Framework
|
||
|
|
# =============================================================================
|
||
|
|
# Note: This service uses "profiles" - only starts when explicitly requested
|
||
|
|
# Start with: docker compose --profile training up -d axolotl
|
||
|
|
axolotl:
|
||
|
|
image: winglian/axolotl:main-py3.11-cu121-2.2.2
|
||
|
|
container_name: gpu_training
|
||
|
|
runtime: nvidia
|
||
|
|
volumes:
|
||
|
|
- ./training/configs:/workspace/configs
|
||
|
|
- ./training/data:/workspace/data
|
||
|
|
- ./training/output:/workspace/output
|
||
|
|
- ${MODELS_PATH:-/workspace/models}:/workspace/models
|
||
|
|
- training_cache:/root/.cache
|
||
|
|
environment:
|
||
|
|
NVIDIA_VISIBLE_DEVICES: all
|
||
|
|
WANDB_API_KEY: ${WANDB_API_KEY:-}
|
||
|
|
HF_TOKEN: ${HF_TOKEN:-}
|
||
|
|
working_dir: /workspace
|
||
|
|
# Default command - override when running specific training
|
||
|
|
command: sleep infinity
|
||
|
|
deploy:
|
||
|
|
resources:
|
||
|
|
reservations:
|
||
|
|
devices:
|
||
|
|
- driver: nvidia
|
||
|
|
count: 1
|
||
|
|
capabilities: [gpu]
|
||
|
|
profiles:
|
||
|
|
- training
|
||
|
|
labels:
|
||
|
|
- "service=axolotl"
|
||
|
|
- "stack=gpu-ai"
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# JupyterLab - Interactive Development Environment
|
||
|
|
# =============================================================================
|
||
|
|
jupyter:
|
||
|
|
image: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel
|
||
|
|
container_name: gpu_jupyter
|
||
|
|
restart: unless-stopped
|
||
|
|
runtime: nvidia
|
||
|
|
volumes:
|
||
|
|
- ./notebooks:/workspace/notebooks
|
||
|
|
- ${MODELS_PATH:-/workspace/models}:/workspace/models
|
||
|
|
- jupyter_cache:/root/.cache
|
||
|
|
ports:
|
||
|
|
- "8888:8888"
|
||
|
|
environment:
|
||
|
|
NVIDIA_VISIBLE_DEVICES: all
|
||
|
|
JUPYTER_ENABLE_LAB: "yes"
|
||
|
|
JUPYTER_TOKEN: ${JUPYTER_TOKEN:-pivoine-ai-2025}
|
||
|
|
HF_TOKEN: ${HF_TOKEN:-}
|
||
|
|
command: |
|
||
|
|
bash -c "
|
||
|
|
pip install --quiet jupyterlab transformers datasets accelerate bitsandbytes peft trl sentencepiece protobuf &&
|
||
|
|
jupyter lab --ip=0.0.0.0 --port=8888 --allow-root --no-browser --NotebookApp.token='${JUPYTER_TOKEN:-pivoine-ai-2025}'
|
||
|
|
"
|
||
|
|
healthcheck:
|
||
|
|
test: ["CMD", "curl", "-f", "http://localhost:8888/"]
|
||
|
|
interval: 30s
|
||
|
|
timeout: 10s
|
||
|
|
retries: 3
|
||
|
|
start_period: 60s
|
||
|
|
deploy:
|
||
|
|
resources:
|
||
|
|
reservations:
|
||
|
|
devices:
|
||
|
|
- driver: nvidia
|
||
|
|
count: 1
|
||
|
|
capabilities: [gpu]
|
||
|
|
labels:
|
||
|
|
- "service=jupyter"
|
||
|
|
- "stack=gpu-ai"
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# Netdata - System & GPU Monitoring
|
||
|
|
# =============================================================================
|
||
|
|
netdata:
|
||
|
|
image: netdata/netdata:latest
|
||
|
|
container_name: gpu_netdata
|
||
|
|
restart: unless-stopped
|
||
|
|
runtime: nvidia
|
||
|
|
hostname: gpu-runpod
|
||
|
|
cap_add:
|
||
|
|
- SYS_PTRACE
|
||
|
|
- SYS_ADMIN
|
||
|
|
security_opt:
|
||
|
|
- apparmor:unconfined
|
||
|
|
environment:
|
||
|
|
NVIDIA_VISIBLE_DEVICES: all
|
||
|
|
TZ: ${TIMEZONE:-Europe/Berlin}
|
||
|
|
volumes:
|
||
|
|
- /sys:/host/sys:ro
|
||
|
|
- /proc:/host/proc:ro
|
||
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||
|
|
- /etc/os-release:/host/etc/os-release:ro
|
||
|
|
- netdata_config:/etc/netdata
|
||
|
|
- netdata_cache:/var/cache/netdata
|
||
|
|
- netdata_lib:/var/lib/netdata
|
||
|
|
ports:
|
||
|
|
- "19999:19999"
|
||
|
|
labels:
|
||
|
|
- "service=netdata"
|
||
|
|
- "stack=gpu-ai"
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# Volumes
|
||
|
|
# =============================================================================
|
||
|
|
volumes:
|
||
|
|
# ComfyUI data
|
||
|
|
comfyui_data:
|
||
|
|
driver: local
|
||
|
|
comfyui_output:
|
||
|
|
driver: local
|
||
|
|
comfyui_input:
|
||
|
|
driver: local
|
||
|
|
|
||
|
|
# Training data
|
||
|
|
training_cache:
|
||
|
|
driver: local
|
||
|
|
|
||
|
|
# Jupyter data
|
||
|
|
jupyter_cache:
|
||
|
|
driver: local
|
||
|
|
|
||
|
|
# Netdata data
|
||
|
|
netdata_config:
|
||
|
|
driver: local
|
||
|
|
netdata_cache:
|
||
|
|
driver: local
|
||
|
|
netdata_lib:
|
||
|
|
driver: local
|
||
|
|
|
||
|
|
# =============================================================================
|
||
|
|
# Networks
|
||
|
|
# =============================================================================
|
||
|
|
networks:
|
||
|
|
default:
|
||
|
|
driver: bridge
|
||
|
|
ipam:
|
||
|
|
config:
|
||
|
|
- subnet: 172.25.0.0/24
|