- Add Dockerfile with minimal setup (supervisor, tailscale) - Add start.sh bootstrap script for container initialization - Add Gitea workflow for automated Docker image builds - Add comprehensive RUNPOD_TEMPLATE.md documentation - Add bootstrap-venvs.sh for Python venv health checks This enables deployment of the AI orchestrator on RunPod using: - Minimal Docker image (~2-3GB) for fast deployment - Network volume for models and data persistence (~80-200GB) - Automated builds on push to main or version tags - Full Tailscale VPN integration - Supervisor process management
142 lines
4.7 KiB
Bash
142 lines
4.7 KiB
Bash
#!/bin/bash
|
|
# RunPod container startup script
|
|
# This script initializes the container environment and starts all services
|
|
|
|
set -e
|
|
|
|
echo "========================================"
|
|
echo " RunPod AI Orchestrator - Starting"
|
|
echo "========================================"
|
|
|
|
# [1/7] Start SSH server (required by RunPod)
|
|
echo "[1/7] Starting SSH server..."
|
|
service ssh start
|
|
echo " ✓ SSH server started"
|
|
|
|
# [2/7] Add /workspace/bin to PATH for arty and custom scripts
|
|
echo "[2/7] Configuring PATH..."
|
|
export PATH="/workspace/bin:$PATH"
|
|
echo " ✓ PATH updated: /workspace/bin added"
|
|
|
|
# [3/7] Source environment variables from network volume
|
|
echo "[3/7] Loading environment from network volume..."
|
|
if [ -f /workspace/ai/.env ]; then
|
|
set -a
|
|
source /workspace/ai/.env
|
|
set +a
|
|
echo " ✓ Environment loaded from /workspace/ai/.env"
|
|
else
|
|
echo " ⚠ No .env file found at /workspace/ai/.env"
|
|
echo " Some services may not function correctly without environment variables"
|
|
fi
|
|
|
|
# [4/7] Configure and start Tailscale VPN
|
|
echo "[4/7] Configuring Tailscale VPN..."
|
|
if [ -n "${TAILSCALE_AUTHKEY:-}" ]; then
|
|
echo " Starting Tailscale daemon..."
|
|
tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
|
|
sleep 3
|
|
|
|
echo " Connecting to Tailscale network..."
|
|
HOSTNAME="runpod-$(hostname)"
|
|
tailscale up --authkey="$TAILSCALE_AUTHKEY" --advertise-tags=tag:gpu --hostname="$HOSTNAME" || {
|
|
echo " ⚠ Tailscale connection failed, continuing without VPN"
|
|
}
|
|
|
|
# Get Tailscale IP if connected
|
|
TAILSCALE_IP=$(tailscale ip -4 2>/dev/null || echo "not connected")
|
|
if [ "$TAILSCALE_IP" != "not connected" ]; then
|
|
echo " ✓ Tailscale connected"
|
|
echo " Hostname: $HOSTNAME"
|
|
echo " IP: $TAILSCALE_IP"
|
|
|
|
# Export for other services
|
|
export GPU_TAILSCALE_IP="$TAILSCALE_IP"
|
|
else
|
|
echo " ⚠ Tailscale failed to obtain IP"
|
|
fi
|
|
else
|
|
echo " ⚠ Tailscale disabled (no TAILSCALE_AUTHKEY in .env)"
|
|
echo " Services requiring VPN connectivity will not work"
|
|
fi
|
|
|
|
# [5/7] Check Python virtual environments health
|
|
echo "[5/7] Checking Python virtual environments..."
|
|
PYTHON_VERSION=$(python3 --version)
|
|
echo " System Python: $PYTHON_VERSION"
|
|
|
|
# Check if bootstrap script exists and run it
|
|
if [ -f /workspace/ai/scripts/bootstrap-venvs.sh ]; then
|
|
echo " Running venv health check..."
|
|
bash /workspace/ai/scripts/bootstrap-venvs.sh
|
|
else
|
|
echo " ⚠ No venv bootstrap script found (optional)"
|
|
fi
|
|
|
|
# [6/7] Configure Supervisor
|
|
echo "[6/7] Configuring Supervisor process manager..."
|
|
if [ -f /workspace/ai/supervisord.conf ]; then
|
|
# Supervisor expects config at /workspace/supervisord.conf (based on arty scripts)
|
|
if [ ! -f /workspace/supervisord.conf ]; then
|
|
cp /workspace/ai/supervisord.conf /workspace/supervisord.conf
|
|
echo " ✓ Supervisor config copied to /workspace/supervisord.conf"
|
|
fi
|
|
|
|
# Create logs directory if it doesn't exist
|
|
mkdir -p /workspace/logs
|
|
|
|
echo " ✓ Supervisor configured"
|
|
else
|
|
echo " ⚠ No supervisord.conf found at /workspace/ai/supervisord.conf"
|
|
echo " Supervisor will not be started"
|
|
fi
|
|
|
|
# [7/7] Start Supervisor to manage services
|
|
echo "[7/7] Starting Supervisor and managed services..."
|
|
if [ -f /workspace/supervisord.conf ]; then
|
|
# Start supervisor daemon
|
|
supervisord -c /workspace/supervisord.conf
|
|
echo " ✓ Supervisor daemon started"
|
|
|
|
# Wait a moment for services to initialize
|
|
sleep 3
|
|
|
|
# Display service status
|
|
echo ""
|
|
echo "Service Status:"
|
|
echo "---------------"
|
|
supervisorctl -c /workspace/supervisord.conf status || echo " ⚠ Could not query service status"
|
|
else
|
|
echo " ⚠ Skipping Supervisor startup (no config file)"
|
|
fi
|
|
|
|
# Display connection information
|
|
echo ""
|
|
echo "========================================"
|
|
echo " Container Ready"
|
|
echo "========================================"
|
|
echo "Services:"
|
|
echo " - SSH: port 22"
|
|
echo " - ComfyUI: http://localhost:8188"
|
|
echo " - Supervisor Web UI: http://localhost:9001"
|
|
echo " - Model Orchestrator: http://localhost:9000"
|
|
if [ -n "${TAILSCALE_IP:-}" ] && [ "$TAILSCALE_IP" != "not connected" ]; then
|
|
echo " - Tailscale IP: $TAILSCALE_IP"
|
|
fi
|
|
echo ""
|
|
echo "Network Volume: /workspace"
|
|
echo "Project Directory: /workspace/ai"
|
|
echo "Logs: /workspace/logs"
|
|
echo ""
|
|
echo "To view service logs:"
|
|
echo " supervisorctl -c /workspace/supervisord.conf tail -f <service_name>"
|
|
echo ""
|
|
echo "To manage services:"
|
|
echo " supervisorctl -c /workspace/supervisord.conf status"
|
|
echo " supervisorctl -c /workspace/supervisord.conf restart <service_name>"
|
|
echo "========================================"
|
|
|
|
# Keep container running
|
|
echo "Container is running. Press Ctrl+C to stop."
|
|
sleep infinity
|