docker-compose/ai/deploy-gpu-stack.sh

#!/bin/bash
# GPU Stack Deployment Script
# Run this on the GPU server after SSH access is established

set -e  # Exit on error

echo "=================================="
echo "GPU Stack Deployment Script"
echo "=================================="
echo ""

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Functions
print_success() {
    echo -e "${GREEN}✓ $1${NC}"
}

print_error() {
    echo -e "${RED}✗ $1${NC}"
}

print_info() {
    echo -e "${YELLOW}→ $1${NC}"
}

# Check if running as root
if [[ $EUID -ne 0 ]]; then
   print_error "This script must be run as root (use sudo)"
   exit 1
fi

# Step 1: Check prerequisites
print_info "Checking prerequisites..."

if ! command -v docker &> /dev/null; then
    print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first."
    exit 1
fi
print_success "Docker installed"

if ! command -v nvidia-smi &> /dev/null; then
    print_error "nvidia-smi not found. Is this a GPU server?"
    exit 1
fi
print_success "NVIDIA GPU detected"

if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then
    print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit."
    exit 1
fi
print_success "Docker GPU access working"

# Step 2: Create directory structure
print_info "Creating directory structure..."

mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring}
cd /workspace/gpu-stack

print_success "Directory structure created"

# Step 3: Create .env file
if [ ! -f .env ]; then
    print_info "Creating .env file..."

    cat > .env << 'EOF'
# GPU Stack Environment Variables

# Timezone
TIMEZONE=Europe/Berlin

# VPN Network
VPS_IP=10.8.0.1
GPU_IP=10.8.0.2

# Model Storage (network volume)
MODELS_PATH=/workspace/models

# Hugging Face Token (optional, for gated models like Llama)
# Get from: https://huggingface.co/settings/tokens
HF_TOKEN=

# Weights & Biases (optional, for training logging)
# Get from: https://wandb.ai/authorize
WANDB_API_KEY=

# JupyterLab Access Token
JUPYTER_TOKEN=pivoine-ai-2025

# PostgreSQL (on VPS)
DB_HOST=10.8.0.1
DB_PORT=5432
DB_USER=valknar
DB_PASSWORD=ragnarok98
DB_NAME=openwebui
EOF

    chmod 600 .env
    print_success ".env file created (please edit with your tokens)"
else
    print_success ".env file already exists"
fi

# Step 4: Download docker-compose.yaml
print_info "Downloading docker-compose.yaml..."

# In production, this would be copied from the repo
# For now, assume it's already in the current directory
if [ ! -f docker-compose.yaml ]; then
    print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml"
    exit 1
fi

print_success "docker-compose.yaml found"

# Step 5: Pre-download models (optional but recommended)
print_info "Do you want to pre-download models? (y/n)"
read -r response

if [[ "$response" =~ ^[Yy]$ ]]; then
    print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..."

    mkdir -p /workspace/models

    # Use huggingface-cli to download
    pip install -q huggingface-hub

    huggingface-cli download \
        meta-llama/Meta-Llama-3.1-8B-Instruct \
        --local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \
        --local-dir-use-symlinks False || print_error "Model download failed (may need HF_TOKEN)"

    print_success "Model downloaded to /workspace/models"
fi

# Step 6: Start services
print_info "Starting GPU stack services..."

docker compose up -d vllm comfyui jupyter netdata

print_success "Services starting (this may take a few minutes)..."

# Step 7: Wait for services
print_info "Waiting for services to be ready..."

sleep 10

# Check service health
print_info "Checking service status..."

if docker ps | grep -q gpu_vllm; then
    print_success "vLLM container running"
else
    print_error "vLLM container not running"
fi

if docker ps | grep -q gpu_comfyui; then
    print_success "ComfyUI container running"
else
    print_error "ComfyUI container not running"
fi

if docker ps | grep -q gpu_jupyter; then
    print_success "JupyterLab container running"
else
    print_error "JupyterLab container not running"
fi

if docker ps | grep -q gpu_netdata; then
    print_success "Netdata container running"
else
    print_error "Netdata container not running"
fi

# Step 8: Display access information
echo ""
echo "=================================="
echo "Deployment Complete!"
echo "=================================="
echo ""
echo "Services accessible via VPN (from VPS):"
echo "  - vLLM API: http://10.8.0.2:8000"
echo "  - ComfyUI: http://10.8.0.2:8188"
echo "  - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)"
echo "  - Netdata: http://10.8.0.2:19999"
echo ""
echo "Local access (from GPU server):"
echo "  - vLLM API: http://localhost:8000"
echo "  - ComfyUI: http://localhost:8188"
echo "  - JupyterLab: http://localhost:8888"
echo "  - Netdata: http://localhost:19999"
echo ""
echo "Useful commands:"
echo "  - View logs: docker compose logs -f"
echo "  - Check status: docker compose ps"
echo "  - Stop all: docker compose down"
echo "  - Restart service: docker compose restart vllm"
echo "  - Start training: docker compose --profile training up -d axolotl"
echo ""
echo "Next steps:"
echo "  1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)"
echo "  2. Test vLLM: curl http://localhost:8000/v1/models"
echo "  3. Configure LiteLLM on VPS to use http://10.8.0.2:8000"
echo "  4. Download ComfyUI models via web interface"
echo ""

# Step 9: Create helpful aliases
print_info "Creating helpful aliases..."

cat >> ~/.bashrc << 'EOF'

# GPU Stack Aliases
alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f'
alias gpu-ps='cd /workspace/gpu-stack && docker compose ps'
alias gpu-restart='cd /workspace/gpu-stack && docker compose restart'
alias gpu-down='cd /workspace/gpu-stack && docker compose down'
alias gpu-up='cd /workspace/gpu-stack && docker compose up -d'
alias gpu-stats='watch -n 1 nvidia-smi'
alias gpu-top='nvtop'
EOF

print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)"

echo ""
print_success "All done! 🚀"