#!/bin/bash # GPU Stack Deployment Script # Run this on the GPU server after SSH access is established set -e # Exit on error echo "==================================" echo "GPU Stack Deployment Script" echo "==================================" echo "" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Functions print_success() { echo -e "${GREEN}✓ $1${NC}" } print_error() { echo -e "${RED}✗ $1${NC}" } print_info() { echo -e "${YELLOW}→ $1${NC}" } # Check if running as root if [[ $EUID -ne 0 ]]; then print_error "This script must be run as root (use sudo)" exit 1 fi # Step 1: Check prerequisites print_info "Checking prerequisites..." if ! command -v docker &> /dev/null; then print_error "Docker is not installed. Please run DOCKER_GPU_SETUP.md first." exit 1 fi print_success "Docker installed" if ! command -v nvidia-smi &> /dev/null; then print_error "nvidia-smi not found. Is this a GPU server?" exit 1 fi print_success "NVIDIA GPU detected" if ! docker run --rm --runtime=nvidia --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi &> /dev/null; then print_error "Docker cannot access GPU. Please configure NVIDIA Container Toolkit." exit 1 fi print_success "Docker GPU access working" # Step 2: Create directory structure print_info "Creating directory structure..." mkdir -p /workspace/gpu-stack/{vllm,comfyui,training/{configs,data,output},notebooks,monitoring} cd /workspace/gpu-stack print_success "Directory structure created" # Step 3: Create .env file if [ ! -f .env ]; then print_info "Creating .env file..." cat > .env << 'EOF' # GPU Stack Environment Variables # Timezone TIMEZONE=Europe/Berlin # VPN Network VPS_IP=10.8.0.1 GPU_IP=10.8.0.2 # Model Storage (network volume) MODELS_PATH=/workspace/models # Hugging Face Token (optional, for gated models like Llama) # Get from: https://huggingface.co/settings/tokens HF_TOKEN= # Weights & Biases (optional, for training logging) # Get from: https://wandb.ai/authorize WANDB_API_KEY= # JupyterLab Access Token JUPYTER_TOKEN=pivoine-ai-2025 # PostgreSQL (on VPS) DB_HOST=10.8.0.1 DB_PORT=5432 DB_USER=valknar DB_PASSWORD=ragnarok98 DB_NAME=openwebui EOF chmod 600 .env print_success ".env file created (please edit with your tokens)" else print_success ".env file already exists" fi # Step 4: Download docker-compose.yaml print_info "Downloading docker-compose.yaml..." # In production, this would be copied from the repo # For now, assume it's already in the current directory if [ ! -f docker-compose.yaml ]; then print_error "docker-compose.yaml not found. Please copy gpu-server-compose.yaml to docker-compose.yaml" exit 1 fi print_success "docker-compose.yaml found" # Step 5: Pre-download models (optional but recommended) print_info "Do you want to pre-download models? (y/n)" read -r response if [[ "$response" =~ ^[Yy]$ ]]; then print_info "Downloading Llama 3.1 8B Instruct (this will take a while)..." mkdir -p /workspace/models # Use huggingface-cli to download pip install -q huggingface-hub huggingface-cli download \ meta-llama/Meta-Llama-3.1-8B-Instruct \ --local-dir /workspace/models/Meta-Llama-3.1-8B-Instruct \ --local-dir-use-symlinks False || print_error "Model download failed (may need HF_TOKEN)" print_success "Model downloaded to /workspace/models" fi # Step 6: Start services print_info "Starting GPU stack services..." docker compose up -d vllm comfyui jupyter netdata print_success "Services starting (this may take a few minutes)..." # Step 7: Wait for services print_info "Waiting for services to be ready..." sleep 10 # Check service health print_info "Checking service status..." if docker ps | grep -q gpu_vllm; then print_success "vLLM container running" else print_error "vLLM container not running" fi if docker ps | grep -q gpu_comfyui; then print_success "ComfyUI container running" else print_error "ComfyUI container not running" fi if docker ps | grep -q gpu_jupyter; then print_success "JupyterLab container running" else print_error "JupyterLab container not running" fi if docker ps | grep -q gpu_netdata; then print_success "Netdata container running" else print_error "Netdata container not running" fi # Step 8: Display access information echo "" echo "==================================" echo "Deployment Complete!" echo "==================================" echo "" echo "Services accessible via VPN (from VPS):" echo " - vLLM API: http://10.8.0.2:8000" echo " - ComfyUI: http://10.8.0.2:8188" echo " - JupyterLab: http://10.8.0.2:8888 (token: pivoine-ai-2025)" echo " - Netdata: http://10.8.0.2:19999" echo "" echo "Local access (from GPU server):" echo " - vLLM API: http://localhost:8000" echo " - ComfyUI: http://localhost:8188" echo " - JupyterLab: http://localhost:8888" echo " - Netdata: http://localhost:19999" echo "" echo "Useful commands:" echo " - View logs: docker compose logs -f" echo " - Check status: docker compose ps" echo " - Stop all: docker compose down" echo " - Restart service: docker compose restart vllm" echo " - Start training: docker compose --profile training up -d axolotl" echo "" echo "Next steps:" echo " 1. Wait for vLLM to load model (check logs: docker compose logs -f vllm)" echo " 2. Test vLLM: curl http://localhost:8000/v1/models" echo " 3. Configure LiteLLM on VPS to use http://10.8.0.2:8000" echo " 4. Download ComfyUI models via web interface" echo "" # Step 9: Create helpful aliases print_info "Creating helpful aliases..." cat >> ~/.bashrc << 'EOF' # GPU Stack Aliases alias gpu-logs='cd /workspace/gpu-stack && docker compose logs -f' alias gpu-ps='cd /workspace/gpu-stack && docker compose ps' alias gpu-restart='cd /workspace/gpu-stack && docker compose restart' alias gpu-down='cd /workspace/gpu-stack && docker compose down' alias gpu-up='cd /workspace/gpu-stack && docker compose up -d' alias gpu-stats='watch -n 1 nvidia-smi' alias gpu-top='nvtop' EOF print_success "Aliases added to ~/.bashrc (reload with: source ~/.bashrc)" echo "" print_success "All done! 🚀"