Files
docker-compose/ai/deploy-gpu-stack.sh
Sebastian Krüger 8de88d96ac docs(ai): add comprehensive GPU setup documentation and configs
- Add setup guides (SETUP_GUIDE, TAILSCALE_SETUP, DOCKER_GPU_SETUP, etc.)
- Add deployment configurations (litellm-config-gpu.yaml, gpu-server-compose.yaml)
- Add GPU_DEPLOYMENT_LOG.md with current infrastructure details
- Add GPU_EXPANSION_PLAN.md with complete provider comparison
- Add deploy-gpu-stack.sh automation script

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-21 12:57:06 +01:00

230 lines
6.1 KiB
Bash
Executable File

#!/bin/bash
# GPU Stack Deployment Script
# Run this on the GPU server after SSH access is established
set -e # Exit on error
echo "=================================="
echo "GPU Stack Deployment Script"
echo "=================================="
echo ""
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Functions
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_error() {
echo -e "${RED}$1${NC}"
}
print_info() {
echo -e "${YELLOW}$1${NC}"
}
# Check if running as root
if [[ $EUID -ne 0 ]]; then
print_error "This script must be run as root (use sudo)"
exit 1
fi
# Step 1: Check prerequisites
print_info "Checking prerequisites..."
if ! command -v docker &> /dev/null; then
print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first."
exit 1
fi
print_success "Docker installed"
if ! command -v nvidia-smi &> /dev/null; then
print_error "nvidia-smi not found. Is this a GPU server?"
exit 1
fi
print_success "NVIDIA GPU detected"
if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then
print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit."
exit 1
fi
print_success "Docker GPU access working"
# Step 2: Create directory structure
print_info "Creating directory structure..."
mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring}
cd /workspace/gpu-stack
print_success "Directory structure created"
# Step 3: Create .env file
if [ ! -f .env ]; then
print_info "Creating .env file..."
cat > .env << 'EOF'
# GPU Stack Environment Variables
# Timezone
TIMEZONE=Europe/Berlin
# VPN Network
VPS_IP=10.8.0.1
GPU_IP=10.8.0.2
# Model Storage (network volume)
MODELS_PATH=/workspace/models
# Hugging Face Token (optional, for gated models like Llama)
# Get from: https://huggingface.co/settings/tokens
HF_TOKEN=
# Weights & Biases (optional, for training logging)
# Get from: https://wandb.ai/authorize
WANDB_API_KEY=
# JupyterLab Access Token
JUPYTER_TOKEN=pivoine-ai-2025
# PostgreSQL (on VPS)
DB_HOST=10.8.0.1
DB_PORT=5432
DB_USER=valknar
DB_PASSWORD=ragnarok98
DB_NAME=openwebui
EOF
chmod 600 .env
print_success ".env file created (please edit with your tokens)"
else
print_success ".env file already exists"
fi
# Step 4: Download docker-compose.yaml
print_info "Downloading docker-compose.yaml..."
# In production, this would be copied from the repo
# For now, assume it's already in the current directory
if [ ! -f docker-compose.yaml ]; then
print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml"
exit 1
fi
print_success "docker-compose.yaml found"
# Step 5: Pre-download models (optional but recommended)
print_info "Do you want to pre-download models? (y/n)"
read -r response
if [[ "$response" =~ ^[Yy]$ ]]; then
print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..."
mkdir -p /workspace/models
# Use huggingface-cli to download
pip install -q huggingface-hub
huggingface-cli download \
meta-llama/Meta-Llama-3.1-8B-Instruct \
--local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \
--local-dir-use-symlinks False || print_error "Model download failed (may need HF_TOKEN)"
print_success "Model downloaded to /workspace/models"
fi
# Step 6: Start services
print_info "Starting GPU stack services..."
docker compose up -d vllm comfyui jupyter netdata
print_success "Services starting (this may take a few minutes)..."
# Step 7: Wait for services
print_info "Waiting for services to be ready..."
sleep 10
# Check service health
print_info "Checking service status..."
if docker ps | grep -q gpu_vllm; then
print_success "vLLM container running"
else
print_error "vLLM container not running"
fi
if docker ps | grep -q gpu_comfyui; then
print_success "ComfyUI container running"
else
print_error "ComfyUI container not running"
fi
if docker ps | grep -q gpu_jupyter; then
print_success "JupyterLab container running"
else
print_error "JupyterLab container not running"
fi
if docker ps | grep -q gpu_netdata; then
print_success "Netdata container running"
else
print_error "Netdata container not running"
fi
# Step 8: Display access information
echo ""
echo "=================================="
echo "Deployment Complete!"
echo "=================================="
echo ""
echo "Services accessible via VPN (from VPS):"
echo " - vLLM API: http://10.8.0.2:8000"
echo " - ComfyUI: http://10.8.0.2:8188"
echo " - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)"
echo " - Netdata: http://10.8.0.2:19999"
echo ""
echo "Local access (from GPU server):"
echo " - vLLM API: http://localhost:8000"
echo " - ComfyUI: http://localhost:8188"
echo " - JupyterLab: http://localhost:8888"
echo " - Netdata: http://localhost:19999"
echo ""
echo "Useful commands:"
echo " - View logs: docker compose logs -f"
echo " - Check status: docker compose ps"
echo " - Stop all: docker compose down"
echo " - Restart service: docker compose restart vllm"
echo " - Start training: docker compose --profile training up -d axolotl"
echo ""
echo "Next steps:"
echo " 1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)"
echo " 2. Test vLLM: curl http://localhost:8000/v1/models"
echo " 3. Configure LiteLLM on VPS to use http://10.8.0.2:8000"
echo " 4. Download ComfyUI models via web interface"
echo ""
# Step 9: Create helpful aliases
print_info "Creating helpful aliases..."
cat >> ~/.bashrc << 'EOF'
# GPU Stack Aliases
alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f'
alias gpu-ps='cd /workspace/gpu-stack && docker compose ps'
alias gpu-restart='cd /workspace/gpu-stack && docker compose restart'
alias gpu-down='cd /workspace/gpu-stack && docker compose down'
alias gpu-up='cd /workspace/gpu-stack && docker compose up -d'
alias gpu-stats='watch -n 1 nvidia-smi'
alias gpu-top='nvtop'
EOF
print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)"
echo ""
print_success "All done! 🚀"