docs(ai): add comprehensive GPU setup documentation and configs

- Add setup guides (SETUP_GUIDE, TAILSCALE_SETUP, DOCKER_GPU_SETUP, etc.) - Add deployment configurations (litellm-config-gpu.yaml, gpu-server-compose.yaml) - Add GPU_DEPLOYMENT_LOG.md with current infrastructure details - Add GPU_EXPANSION_PLAN.md with complete provider comparison - Add deploy-gpu-stack.sh automation script 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-21 12:57:06 +01:00
parent c0b1308ffe
commit 8de88d96ac
10 changed files with 4089 additions and 0 deletions
--- a/ai/deploy-gpu-stack.sh
+++ b/ai/deploy-gpu-stack.sh
@@ -0,0 +1,229 @@
+#!/bin/bash
+# GPU Stack Deployment Script
+# Run this on the GPU server after SSH access is established
+
+set -e  # Exit on error
+
+echo "=================================="
+echo "GPU Stack Deployment Script"
+echo "=================================="
+echo ""
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Functions
+print_success() {
+    echo -e "${GREEN}✓ $1${NC}"
+}
+
+print_error() {
+    echo -e "${RED}✗ $1${NC}"
+}
+
+print_info() {
+    echo -e "${YELLOW}→ $1${NC}"
+}
+
+# Check if running as root
+if [[ $EUID -ne 0 ]]; then
+   print_error "This script must be run as root (use sudo)"
+   exit 1
+fi
+
+# Step 1: Check prerequisites
+print_info "Checking prerequisites..."
+
+if ! command -v docker &> /dev/null; then
+    print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first."
+    exit 1
+fi
+print_success "Docker installed"
+
+if ! command -v nvidia-smi &> /dev/null; then
+    print_error "nvidia-smi not found. Is this a GPU server?"
+    exit 1
+fi
+print_success "NVIDIA GPU detected"
+
+if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then
+    print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit."
+    exit 1
+fi
+print_success "Docker GPU access working"
+
+# Step 2: Create directory structure
+print_info "Creating directory structure..."
+
+mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring}
+cd /workspace/gpu-stack
+
+print_success "Directory structure created"
+
+# Step 3: Create .env file
+if [ ! -f .env ]; then
+    print_info "Creating .env file..."
+
+    cat > .env << 'EOF'
+# GPU Stack Environment Variables
+
+# Timezone
+TIMEZONE=Europe/Berlin
+
+# VPN Network
+VPS_IP=10.8.0.1
+GPU_IP=10.8.0.2
+
+# Model Storage (network volume)
+MODELS_PATH=/workspace/models
+
+# Hugging Face Token (optional, for gated models like Llama)
+# Get from: https://huggingface.co/settings/tokens
+HF_TOKEN=
+
+# Weights & Biases (optional, for training logging)
+# Get from: https://wandb.ai/authorize
+WANDB_API_KEY=
+
+# JupyterLab Access Token
+JUPYTER_TOKEN=pivoine-ai-2025
+
+# PostgreSQL (on VPS)
+DB_HOST=10.8.0.1
+DB_PORT=5432
+DB_USER=valknar
+DB_PASSWORD=ragnarok98
+DB_NAME=openwebui
+EOF
+
+    chmod 600 .env
+    print_success ".env file created (please edit with your tokens)"
+else
+    print_success ".env file already exists"
+fi
+
+# Step 4: Download docker-compose.yaml
+print_info "Downloading docker-compose.yaml..."
+
+# In production, this would be copied from the repo
+# For now, assume it's already in the current directory
+if [ ! -f docker-compose.yaml ]; then
+    print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml"
+    exit 1
+fi
+
+print_success "docker-compose.yaml found"
+
+# Step 5: Pre-download models (optional but recommended)
+print_info "Do you want to pre-download models? (y/n)"
+read -r response
+
+if [[ "$response" =~ ^[Yy]$ ]]; then
+    print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..."
+
+    mkdir -p /workspace/models
+
+    # Use huggingface-cli to download
+    pip install -q huggingface-hub
+
+    huggingface-cli download \
+        meta-llama/Meta-Llama-3.1-8B-Instruct \
+        --local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \
+        --local-dir-use-symlinks False || print_error "Model download failed (may need HF_TOKEN)"
+
+    print_success "Model downloaded to /workspace/models"
+fi
+
+# Step 6: Start services
+print_info "Starting GPU stack services..."
+
+docker compose up -d vllm comfyui jupyter netdata
+
+print_success "Services starting (this may take a few minutes)..."
+
+# Step 7: Wait for services
+print_info "Waiting for services to be ready..."
+
+sleep 10
+
+# Check service health
+print_info "Checking service status..."
+
+if docker ps | grep -q gpu_vllm; then
+    print_success "vLLM container running"
+else
+    print_error "vLLM container not running"
+fi
+
+if docker ps | grep -q gpu_comfyui; then
+    print_success "ComfyUI container running"
+else
+    print_error "ComfyUI container not running"
+fi
+
+if docker ps | grep -q gpu_jupyter; then
+    print_success "JupyterLab container running"
+else
+    print_error "JupyterLab container not running"
+fi
+
+if docker ps | grep -q gpu_netdata; then
+    print_success "Netdata container running"
+else
+    print_error "Netdata container not running"
+fi
+
+# Step 8: Display access information
+echo ""
+echo "=================================="
+echo "Deployment Complete!"
+echo "=================================="
+echo ""
+echo "Services accessible via VPN (from VPS):"
+echo "  - vLLM API: http://10.8.0.2:8000"
+echo "  - ComfyUI: http://10.8.0.2:8188"
+echo "  - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)"
+echo "  - Netdata: http://10.8.0.2:19999"
+echo ""
+echo "Local access (from GPU server):"
+echo "  - vLLM API: http://localhost:8000"
+echo "  - ComfyUI: http://localhost:8188"
+echo "  - JupyterLab: http://localhost:8888"
+echo "  - Netdata: http://localhost:19999"
+echo ""
+echo "Useful commands:"
+echo "  - View logs: docker compose logs -f"
+echo "  - Check status: docker compose ps"
+echo "  - Stop all: docker compose down"
+echo "  - Restart service: docker compose restart vllm"
+echo "  - Start training: docker compose --profile training up -d axolotl"
+echo ""
+echo "Next steps:"
+echo "  1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)"
+echo "  2. Test vLLM: curl http://localhost:8000/v1/models"
+echo "  3. Configure LiteLLM on VPS to use http://10.8.0.2:8000"
+echo "  4. Download ComfyUI models via web interface"
+echo ""
+
+# Step 9: Create helpful aliases
+print_info "Creating helpful aliases..."
+
+cat >> ~/.bashrc << 'EOF'
+
+# GPU Stack Aliases
+alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f'
+alias gpu-ps='cd /workspace/gpu-stack && docker compose ps'
+alias gpu-restart='cd /workspace/gpu-stack && docker compose restart'
+alias gpu-down='cd /workspace/gpu-stack && docker compose down'
+alias gpu-up='cd /workspace/gpu-stack && docker compose up -d'
+alias gpu-stats='watch -n 1 nvidia-smi'
+alias gpu-top='nvtop'
+EOF
+
+print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)"
+
+echo ""
+print_success "All done! 🚀"