230 lines
6.1 KiB
Bash
230 lines
6.1 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
# GPU Stack Deployment Script
|
||
|
|
# Run this on the GPU server after SSH access is established
|
||
|
|
|
||
|
|
set -e # Exit on error
|
||
|
|
|
||
|
|
echo "=================================="
|
||
|
|
echo "GPU Stack Deployment Script"
|
||
|
|
echo "=================================="
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Colors for output
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
NC='\033[0m' # No Color
|
||
|
|
|
||
|
|
# Functions
|
||
|
|
print_success() {
|
||
|
|
echo -e "${GREEN}✓ $1${NC}"
|
||
|
|
}
|
||
|
|
|
||
|
|
print_error() {
|
||
|
|
echo -e "${RED}✗ $1${NC}"
|
||
|
|
}
|
||
|
|
|
||
|
|
print_info() {
|
||
|
|
echo -e "${YELLOW}→ $1${NC}"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check if running as root
|
||
|
|
if [[ $EUID -ne 0 ]]; then
|
||
|
|
print_error "This script must be run as root (use sudo)"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Step 1: Check prerequisites
|
||
|
|
print_info "Checking prerequisites..."
|
||
|
|
|
||
|
|
if ! command -v docker &> /dev/null; then
|
||
|
|
print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first."
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
print_success "Docker installed"
|
||
|
|
|
||
|
|
if ! command -v nvidia-smi &> /dev/null; then
|
||
|
|
print_error "nvidia-smi not found. Is this a GPU server?"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
print_success "NVIDIA GPU detected"
|
||
|
|
|
||
|
|
if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then
|
||
|
|
print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit."
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
print_success "Docker GPU access working"
|
||
|
|
|
||
|
|
# Step 2: Create directory structure
|
||
|
|
print_info "Creating directory structure..."
|
||
|
|
|
||
|
|
mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring}
|
||
|
|
cd /workspace/gpu-stack
|
||
|
|
|
||
|
|
print_success "Directory structure created"
|
||
|
|
|
||
|
|
# Step 3: Create .env file
|
||
|
|
if [ ! -f .env ]; then
|
||
|
|
print_info "Creating .env file..."
|
||
|
|
|
||
|
|
cat > .env << 'EOF'
|
||
|
|
# GPU Stack Environment Variables
|
||
|
|
|
||
|
|
# Timezone
|
||
|
|
TIMEZONE=Europe/Berlin
|
||
|
|
|
||
|
|
# VPN Network
|
||
|
|
VPS_IP=10.8.0.1
|
||
|
|
GPU_IP=10.8.0.2
|
||
|
|
|
||
|
|
# Model Storage (network volume)
|
||
|
|
MODELS_PATH=/workspace/models
|
||
|
|
|
||
|
|
# Hugging Face Token (optional, for gated models like Llama)
|
||
|
|
# Get from: https://huggingface.co/settings/tokens
|
||
|
|
HF_TOKEN=
|
||
|
|
|
||
|
|
# Weights & Biases (optional, for training logging)
|
||
|
|
# Get from: https://wandb.ai/authorize
|
||
|
|
WANDB_API_KEY=
|
||
|
|
|
||
|
|
# JupyterLab Access Token
|
||
|
|
JUPYTER_TOKEN=pivoine-ai-2025
|
||
|
|
|
||
|
|
# PostgreSQL (on VPS)
|
||
|
|
DB_HOST=10.8.0.1
|
||
|
|
DB_PORT=5432
|
||
|
|
DB_USER=valknar
|
||
|
|
DB_PASSWORD=ragnarok98
|
||
|
|
DB_NAME=openwebui
|
||
|
|
EOF
|
||
|
|
|
||
|
|
chmod 600 .env
|
||
|
|
print_success ".env file created (please edit with your tokens)"
|
||
|
|
else
|
||
|
|
print_success ".env file already exists"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Step 4: Download docker-compose.yaml
|
||
|
|
print_info "Downloading docker-compose.yaml..."
|
||
|
|
|
||
|
|
# In production, this would be copied from the repo
|
||
|
|
# For now, assume it's already in the current directory
|
||
|
|
if [ ! -f docker-compose.yaml ]; then
|
||
|
|
print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
print_success "docker-compose.yaml found"
|
||
|
|
|
||
|
|
# Step 5: Pre-download models (optional but recommended)
|
||
|
|
print_info "Do you want to pre-download models? (y/n)"
|
||
|
|
read -r response
|
||
|
|
|
||
|
|
if [[ "$response" =~ ^[Yy]$ ]]; then
|
||
|
|
print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..."
|
||
|
|
|
||
|
|
mkdir -p /workspace/models
|
||
|
|
|
||
|
|
# Use huggingface-cli to download
|
||
|
|
pip install -q huggingface-hub
|
||
|
|
|
||
|
|
huggingface-cli download \
|
||
|
|
meta-llama/Meta-Llama-3.1-8B-Instruct \
|
||
|
|
--local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \
|
||
|
|
--local-dir-use-symlinks False || print_error "Model download failed (may need HF_TOKEN)"
|
||
|
|
|
||
|
|
print_success "Model downloaded to /workspace/models"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Step 6: Start services
|
||
|
|
print_info "Starting GPU stack services..."
|
||
|
|
|
||
|
|
docker compose up -d vllm comfyui jupyter netdata
|
||
|
|
|
||
|
|
print_success "Services starting (this may take a few minutes)..."
|
||
|
|
|
||
|
|
# Step 7: Wait for services
|
||
|
|
print_info "Waiting for services to be ready..."
|
||
|
|
|
||
|
|
sleep 10
|
||
|
|
|
||
|
|
# Check service health
|
||
|
|
print_info "Checking service status..."
|
||
|
|
|
||
|
|
if docker ps | grep -q gpu_vllm; then
|
||
|
|
print_success "vLLM container running"
|
||
|
|
else
|
||
|
|
print_error "vLLM container not running"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if docker ps | grep -q gpu_comfyui; then
|
||
|
|
print_success "ComfyUI container running"
|
||
|
|
else
|
||
|
|
print_error "ComfyUI container not running"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if docker ps | grep -q gpu_jupyter; then
|
||
|
|
print_success "JupyterLab container running"
|
||
|
|
else
|
||
|
|
print_error "JupyterLab container not running"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if docker ps | grep -q gpu_netdata; then
|
||
|
|
print_success "Netdata container running"
|
||
|
|
else
|
||
|
|
print_error "Netdata container not running"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Step 8: Display access information
|
||
|
|
echo ""
|
||
|
|
echo "=================================="
|
||
|
|
echo "Deployment Complete!"
|
||
|
|
echo "=================================="
|
||
|
|
echo ""
|
||
|
|
echo "Services accessible via VPN (from VPS):"
|
||
|
|
echo " - vLLM API: http://10.8.0.2:8000"
|
||
|
|
echo " - ComfyUI: http://10.8.0.2:8188"
|
||
|
|
echo " - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)"
|
||
|
|
echo " - Netdata: http://10.8.0.2:19999"
|
||
|
|
echo ""
|
||
|
|
echo "Local access (from GPU server):"
|
||
|
|
echo " - vLLM API: http://localhost:8000"
|
||
|
|
echo " - ComfyUI: http://localhost:8188"
|
||
|
|
echo " - JupyterLab: http://localhost:8888"
|
||
|
|
echo " - Netdata: http://localhost:19999"
|
||
|
|
echo ""
|
||
|
|
echo "Useful commands:"
|
||
|
|
echo " - View logs: docker compose logs -f"
|
||
|
|
echo " - Check status: docker compose ps"
|
||
|
|
echo " - Stop all: docker compose down"
|
||
|
|
echo " - Restart service: docker compose restart vllm"
|
||
|
|
echo " - Start training: docker compose --profile training up -d axolotl"
|
||
|
|
echo ""
|
||
|
|
echo "Next steps:"
|
||
|
|
echo " 1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)"
|
||
|
|
echo " 2. Test vLLM: curl http://localhost:8000/v1/models"
|
||
|
|
echo " 3. Configure LiteLLM on VPS to use http://10.8.0.2:8000"
|
||
|
|
echo " 4. Download ComfyUI models via web interface"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 9: Create helpful aliases
|
||
|
|
print_info "Creating helpful aliases..."
|
||
|
|
|
||
|
|
cat >> ~/.bashrc << 'EOF'
|
||
|
|
|
||
|
|
# GPU Stack Aliases
|
||
|
|
alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f'
|
||
|
|
alias gpu-ps='cd /workspace/gpu-stack && docker compose ps'
|
||
|
|
alias gpu-restart='cd /workspace/gpu-stack && docker compose restart'
|
||
|
|
alias gpu-down='cd /workspace/gpu-stack && docker compose down'
|
||
|
|
alias gpu-up='cd /workspace/gpu-stack && docker compose up -d'
|
||
|
|
alias gpu-stats='watch -n 1 nvidia-smi'
|
||
|
|
alias gpu-top='nvtop'
|
||
|
|
EOF
|
||
|
|
|
||
|
|
print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)"
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
print_success "All done! 🚀"
|