Files
runpod/start.sh
Sebastian Krüger 571431955d feat: add RunPod Docker template with automated build workflow
- Add Dockerfile with minimal setup (supervisor, tailscale)
- Add start.sh bootstrap script for container initialization
- Add Gitea workflow for automated Docker image builds
- Add comprehensive RUNPOD_TEMPLATE.md documentation
- Add bootstrap-venvs.sh for Python venv health checks

This enables deployment of the AI orchestrator on RunPod using:
- Minimal Docker image (~2-3GB) for fast deployment
- Network volume for models and data persistence (~80-200GB)
- Automated builds on push to main or version tags
- Full Tailscale VPN integration
- Supervisor process management
2025-11-23 21:53:56 +01:00

142 lines
4.7 KiB
Bash

#!/bin/bash
# RunPod container startup script
# This script initializes the container environment and starts all services
set -e
echo "========================================"
echo " RunPod AI Orchestrator - Starting"
echo "========================================"
# [1/7] Start SSH server (required by RunPod)
echo "[1/7] Starting SSH server..."
service ssh start
echo " ✓ SSH server started"
# [2/7] Add /workspace/bin to PATH for arty and custom scripts
echo "[2/7] Configuring PATH..."
export PATH="/workspace/bin:$PATH"
echo " ✓ PATH updated: /workspace/bin added"
# [3/7] Source environment variables from network volume
echo "[3/7] Loading environment from network volume..."
if [ -f /workspace/ai/.env ]; then
set -a
source /workspace/ai/.env
set +a
echo " ✓ Environment loaded from /workspace/ai/.env"
else
echo " ⚠ No .env file found at /workspace/ai/.env"
echo " Some services may not function correctly without environment variables"
fi
# [4/7] Configure and start Tailscale VPN
echo "[4/7] Configuring Tailscale VPN..."
if [ -n "${TAILSCALE_AUTHKEY:-}" ]; then
echo " Starting Tailscale daemon..."
tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
sleep 3
echo " Connecting to Tailscale network..."
HOSTNAME="runpod-$(hostname)"
tailscale up --authkey="$TAILSCALE_AUTHKEY" --advertise-tags=tag:gpu --hostname="$HOSTNAME" || {
echo " ⚠ Tailscale connection failed, continuing without VPN"
}
# Get Tailscale IP if connected
TAILSCALE_IP=$(tailscale ip -4 2>/dev/null || echo "not connected")
if [ "$TAILSCALE_IP" != "not connected" ]; then
echo " ✓ Tailscale connected"
echo " Hostname: $HOSTNAME"
echo " IP: $TAILSCALE_IP"
# Export for other services
export GPU_TAILSCALE_IP="$TAILSCALE_IP"
else
echo " ⚠ Tailscale failed to obtain IP"
fi
else
echo " ⚠ Tailscale disabled (no TAILSCALE_AUTHKEY in .env)"
echo " Services requiring VPN connectivity will not work"
fi
# [5/7] Check Python virtual environments health
echo "[5/7] Checking Python virtual environments..."
PYTHON_VERSION=$(python3 --version)
echo " System Python: $PYTHON_VERSION"
# Check if bootstrap script exists and run it
if [ -f /workspace/ai/scripts/bootstrap-venvs.sh ]; then
echo " Running venv health check..."
bash /workspace/ai/scripts/bootstrap-venvs.sh
else
echo " ⚠ No venv bootstrap script found (optional)"
fi
# [6/7] Configure Supervisor
echo "[6/7] Configuring Supervisor process manager..."
if [ -f /workspace/ai/supervisord.conf ]; then
# Supervisor expects config at /workspace/supervisord.conf (based on arty scripts)
if [ ! -f /workspace/supervisord.conf ]; then
cp /workspace/ai/supervisord.conf /workspace/supervisord.conf
echo " ✓ Supervisor config copied to /workspace/supervisord.conf"
fi
# Create logs directory if it doesn't exist
mkdir -p /workspace/logs
echo " ✓ Supervisor configured"
else
echo " ⚠ No supervisord.conf found at /workspace/ai/supervisord.conf"
echo " Supervisor will not be started"
fi
# [7/7] Start Supervisor to manage services
echo "[7/7] Starting Supervisor and managed services..."
if [ -f /workspace/supervisord.conf ]; then
# Start supervisor daemon
supervisord -c /workspace/supervisord.conf
echo " ✓ Supervisor daemon started"
# Wait a moment for services to initialize
sleep 3
# Display service status
echo ""
echo "Service Status:"
echo "---------------"
supervisorctl -c /workspace/supervisord.conf status || echo " ⚠ Could not query service status"
else
echo " ⚠ Skipping Supervisor startup (no config file)"
fi
# Display connection information
echo ""
echo "========================================"
echo " Container Ready"
echo "========================================"
echo "Services:"
echo " - SSH: port 22"
echo " - ComfyUI: http://localhost:8188"
echo " - Supervisor Web UI: http://localhost:9001"
echo " - Model Orchestrator: http://localhost:9000"
if [ -n "${TAILSCALE_IP:-}" ] && [ "$TAILSCALE_IP" != "not connected" ]; then
echo " - Tailscale IP: $TAILSCALE_IP"
fi
echo ""
echo "Network Volume: /workspace"
echo "Project Directory: /workspace/ai"
echo "Logs: /workspace/logs"
echo ""
echo "To view service logs:"
echo " supervisorctl -c /workspace/supervisord.conf tail -f <service_name>"
echo ""
echo "To manage services:"
echo " supervisorctl -c /workspace/supervisord.conf status"
echo " supervisorctl -c /workspace/supervisord.conf restart <service_name>"
echo "========================================"
# Keep container running
echo "Container is running. Press Ctrl+C to stop."
sleep infinity