ai/deploy-gpu-stack.sh

#!/bin/bash
# GPU Stack Deployment Script
# Run this on the GPU server after SSH access is established

set -e  # Exit on error

echo "=================================="
echo "GPU Stack Deployment Script"
echo "=================================="
echo ""

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Functions
print_success() {
    echo -e "${GREEN}✓ $1${NC}"
}

print_error() {
    echo -e "${RED}✗ $1${NC}"
}

print_info() {
    echo -e "${YELLOW}→ $1${NC}"
}

# Check if running as root
if [[ $EUID -ne 0 ]]; then
   print_error "This script must be run as root (use sudo)"
   exit 1
fi

# Step 1: Check prerequisites
print_info "Checking prerequisites..."

if ! command -v docker &> /dev/null; then
    print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first."
    exit 1
fi
print_success "Docker installed"

if ! command -v nvidia-smi &> /dev/null; then
    print_error "nvidia-smi not found. Is this a GPU server?"
    exit 1
fi
print_success "NVIDIA GPU detected"

if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then
    print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit."
    exit 1
fi
print_success "Docker GPU access working"

# Step 2: Create directory structure
print_info "Creating directory structure..."

mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring}
cd /workspace/gpu-stack

print_success "Directory structure created"

# Step 3: Create .env file
if [ ! -f .env ]; then
    print_info "Creating .env file..."

    cat > .env << 'EOF'
# GPU Stack Environment Variables

# Timezone
TIMEZONE=Europe/Berlin

# VPN Network
VPS_IP=10.8.0.1
GPU_IP=10.8.0.2

# Model Storage (network volume)
MODELS_PATH=/workspace/models

# Hugging Face Token (optional, for gated models like Llama)
# Get from: https://huggingface.co/settings/tokens
HF_TOKEN=

# Weights & Biases (optional, for training logging)
# Get from: https://wandb.ai/authorize
WANDB_API_KEY=

# JupyterLab Access Token
JUPYTER_TOKEN=pivoine-ai-2025

# PostgreSQL (on VPS)
DB_HOST=10.8.0.1
DB_PORT=5432
DB_USER=valknar
DB_PASSWORD=ragnarok98
DB_NAME=openwebui
EOF

    chmod 600 .env
    print_success ".env file created (please edit with your tokens)"
else
    print_success ".env file already exists"
fi

# Step 4: Download docker-compose.yaml
print_info "Downloading docker-compose.yaml..."

# In production, this would be copied from the repo
# For now, assume it's already in the current directory
if [ ! -f docker-compose.yaml ]; then
    print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml"
    exit 1
fi

print_success "docker-compose.yaml found"

# Step 5: Pre-download models (optional but recommended)
print_info "Do you want to pre-download models? (y/n)"
read -r response

if [[ "$response" =~ ^[Yy]$ ]]; then
    print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..."

    mkdir -p /workspace/models

    # Use huggingface-cli to download
    pip install -q huggingface-hub

    huggingface-cli download \
        meta-llama/Meta-Llama-3.1-8B-Instruct \
        --local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \
        --local-dir-use-symlinks False || print_error "Model download failed (may need HF_TOKEN)"

    print_success "Model downloaded to /workspace/models"
fi

# Step 6: Start services
print_info "Starting GPU stack services..."

docker compose up -d vllm comfyui jupyter netdata

print_success "Services starting (this may take a few minutes)..."

# Step 7: Wait for services
print_info "Waiting for services to be ready..."

sleep 10

# Check service health
print_info "Checking service status..."

if docker ps | grep -q gpu_vllm; then
    print_success "vLLM container running"
else
    print_error "vLLM container not running"
fi

if docker ps | grep -q gpu_comfyui; then
    print_success "ComfyUI container running"
else
    print_error "ComfyUI container not running"
fi

if docker ps | grep -q gpu_jupyter; then
    print_success "JupyterLab container running"
else
    print_error "JupyterLab container not running"
fi

if docker ps | grep -q gpu_netdata; then
    print_success "Netdata container running"
else
    print_error "Netdata container not running"
fi

# Step 8: Display access information
echo ""
echo "=================================="
echo "Deployment Complete!"
echo "=================================="
echo ""
echo "Services accessible via VPN (from VPS):"
echo "  - vLLM API: http://10.8.0.2:8000"
echo "  - ComfyUI: http://10.8.0.2:8188"
echo "  - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)"
echo "  - Netdata: http://10.8.0.2:19999"
echo ""
echo "Local access (from GPU server):"
echo "  - vLLM API: http://localhost:8000"
echo "  - ComfyUI: http://localhost:8188"
echo "  - JupyterLab: http://localhost:8888"
echo "  - Netdata: http://localhost:19999"
echo ""
echo "Useful commands:"
echo "  - View logs: docker compose logs -f"
echo "  - Check status: docker compose ps"
echo "  - Stop all: docker compose down"
echo "  - Restart service: docker compose restart vllm"
echo "  - Start training: docker compose --profile training up -d axolotl"
echo ""
echo "Next steps:"
echo "  1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)"
echo "  2. Test vLLM: curl http://localhost:8000/v1/models"
echo "  3. Configure LiteLLM on VPS to use http://10.8.0.2:8000"
echo "  4. Download ComfyUI models via web interface"
echo ""

# Step 9: Create helpful aliases
print_info "Creating helpful aliases..."

cat >> ~/.bashrc << 'EOF'

# GPU Stack Aliases
alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f'
alias gpu-ps='cd /workspace/gpu-stack && docker compose ps'
alias gpu-restart='cd /workspace/gpu-stack && docker compose restart'
alias gpu-down='cd /workspace/gpu-stack && docker compose down'
alias gpu-up='cd /workspace/gpu-stack && docker compose up -d'
alias gpu-stats='watch -n 1 nvidia-smi'
alias gpu-top='nvtop'
EOF

print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)"

echo ""
print_success "All done! 🚀"
docs(ai): add comprehensive GPU setup documentation and configs - Add setup guides (SETUP_GUIDE, TAILSCALE_SETUP, DOCKER_GPU_SETUP, etc.) - Add deployment configurations (litellm-config-gpu.yaml, gpu-server-compose.yaml) - Add GPU_DEPLOYMENT_LOG.md with current infrastructure details - Add GPU_EXPANSION_PLAN.md with complete provider comparison - Add deploy-gpu-stack.sh automation script 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-21 12:57:06 +01:00			`#!/bin/bash`
			`# GPU Stack Deployment Script`
			`# Run this on the GPU server after SSH access is established`

			`set -e # Exit on error`

			`echo "=================================="`
			`echo "GPU Stack Deployment Script"`
			`echo "=================================="`
			`echo ""`

			`# Colors for output`
			`RED='\033[0;31m'`
			`GREEN='\033[0;32m'`
			`YELLOW='\033[1;33m'`
			`NC='\033[0m' # No Color`

			`# Functions`
			`print_success() {`
			`echo -e "${GREEN}✓ $1${NC}"`
			`}`

			`print_error() {`
			`echo -e "${RED}✗ $1${NC}"`
			`}`

			`print_info() {`
			`echo -e "${YELLOW}→ $1${NC}"`
			`}`

			`# Check if running as root`
			`if [[ $EUID -ne 0 ]]; then`
			`print_error "This script must be run as root (use sudo)"`
			`exit 1`
			`fi`

			`# Step 1: Check prerequisites`
			`print_info "Checking prerequisites..."`

			`if ! command -v docker &> /dev/null; then`
			`print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first."`
			`exit 1`
			`fi`
			`print_success "Docker installed"`

			`if ! command -v nvidia-smi &> /dev/null; then`
			`print_error "nvidia-smi not found. Is this a GPU server?"`
			`exit 1`
			`fi`
			`print_success "NVIDIA GPU detected"`

			`if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then`
			`print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit."`
			`exit 1`
			`fi`
			`print_success "Docker GPU access working"`

			`# Step 2: Create directory structure`
			`print_info "Creating directory structure..."`

			`mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring}`
			`cd /workspace/gpu-stack`

			`print_success "Directory structure created"`

			`# Step 3: Create .env file`
			`if [ ! -f .env ]; then`
			`print_info "Creating .env file..."`

			`cat > .env << 'EOF'`
			`# GPU Stack Environment Variables`

			`# Timezone`
			`TIMEZONE=Europe/Berlin`

			`# VPN Network`
			`VPS_IP=10.8.0.1`
			`GPU_IP=10.8.0.2`

			`# Model Storage (network volume)`
			`MODELS_PATH=/workspace/models`

			`# Hugging Face Token (optional, for gated models like Llama)`
			`# Get from: https://huggingface.co/settings/tokens`
			`HF_TOKEN=`

			`# Weights & Biases (optional, for training logging)`
			`# Get from: https://wandb.ai/authorize`
			`WANDB_API_KEY=`

			`# JupyterLab Access Token`
			`JUPYTER_TOKEN=pivoine-ai-2025`

			`# PostgreSQL (on VPS)`
			`DB_HOST=10.8.0.1`
			`DB_PORT=5432`
			`DB_USER=valknar`
			`DB_PASSWORD=ragnarok98`
			`DB_NAME=openwebui`
			`EOF`

			`chmod 600 .env`
			`print_success ".env file created (please edit with your tokens)"`
			`else`
			`print_success ".env file already exists"`
			`fi`

			`# Step 4: Download docker-compose.yaml`
			`print_info "Downloading docker-compose.yaml..."`

			`# In production, this would be copied from the repo`
			`# For now, assume it's already in the current directory`
			`if [ ! -f docker-compose.yaml ]; then`
			`print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml"`
			`exit 1`
			`fi`

			`print_success "docker-compose.yaml found"`

			`# Step 5: Pre-download models (optional but recommended)`
			`print_info "Do you want to pre-download models? (y/n)"`
			`read -r response`

			`if [[ "$response" =~ ^[Yy]$ ]]; then`
			`print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..."`

			`mkdir -p /workspace/models`

			`# Use huggingface-cli to download`
			`pip install -q huggingface-hub`

			`huggingface-cli download \`
			`meta-llama/Meta-Llama-3.1-8B-Instruct \`
			`--local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \`
			`--local-dir-use-symlinks False \|\| print_error "Model download failed (may need HF_TOKEN)"`

			`print_success "Model downloaded to /workspace/models"`
			`fi`

			`# Step 6: Start services`
			`print_info "Starting GPU stack services..."`

			`docker compose up -d vllm comfyui jupyter netdata`

			`print_success "Services starting (this may take a few minutes)..."`

			`# Step 7: Wait for services`
			`print_info "Waiting for services to be ready..."`

			`sleep 10`

			`# Check service health`
			`print_info "Checking service status..."`

			`if docker ps \| grep -q gpu_vllm; then`
			`print_success "vLLM container running"`
			`else`
			`print_error "vLLM container not running"`
			`fi`

			`if docker ps \| grep -q gpu_comfyui; then`
			`print_success "ComfyUI container running"`
			`else`
			`print_error "ComfyUI container not running"`
			`fi`

			`if docker ps \| grep -q gpu_jupyter; then`
			`print_success "JupyterLab container running"`
			`else`
			`print_error "JupyterLab container not running"`
			`fi`

			`if docker ps \| grep -q gpu_netdata; then`
			`print_success "Netdata container running"`
			`else`
			`print_error "Netdata container not running"`
			`fi`

			`# Step 8: Display access information`
			`echo ""`
			`echo "=================================="`
			`echo "Deployment Complete!"`
			`echo "=================================="`
			`echo ""`
			`echo "Services accessible via VPN (from VPS):"`
			`echo " - vLLM API: http://10.8.0.2:8000"`
			`echo " - ComfyUI: http://10.8.0.2:8188"`
			`echo " - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)"`
			`echo " - Netdata: http://10.8.0.2:19999"`
			`echo ""`
			`echo "Local access (from GPU server):"`
			`echo " - vLLM API: http://localhost:8000"`
			`echo " - ComfyUI: http://localhost:8188"`
			`echo " - JupyterLab: http://localhost:8888"`
			`echo " - Netdata: http://localhost:19999"`
			`echo ""`
			`echo "Useful commands:"`
			`echo " - View logs: docker compose logs -f"`
			`echo " - Check status: docker compose ps"`
			`echo " - Stop all: docker compose down"`
			`echo " - Restart service: docker compose restart vllm"`
			`echo " - Start training: docker compose --profile training up -d axolotl"`
			`echo ""`
			`echo "Next steps:"`
			`echo " 1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)"`
			`echo " 2. Test vLLM: curl http://localhost:8000/v1/models"`
			`echo " 3. Configure LiteLLM on VPS to use http://10.8.0.2:8000"`
			`echo " 4. Download ComfyUI models via web interface"`
			`echo ""`

			`# Step 9: Create helpful aliases`
			`print_info "Creating helpful aliases..."`

			`cat >> ~/.bashrc << 'EOF'`

			`# GPU Stack Aliases`
			`alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f'`
			`alias gpu-ps='cd /workspace/gpu-stack && docker compose ps'`
			`alias gpu-restart='cd /workspace/gpu-stack && docker compose restart'`
			`alias gpu-down='cd /workspace/gpu-stack && docker compose down'`
			`alias gpu-up='cd /workspace/gpu-stack && docker compose up -d'`
			`alias gpu-stats='watch -n 1 nvidia-smi'`
			`alias gpu-top='nvtop'`
			`EOF`

			`print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)"`

			`echo ""`
			`print_success "All done! 🚀"`