feat: add RunPod Docker template with automated build workflow
- Add Dockerfile with minimal setup (supervisor, tailscale) - Add start.sh bootstrap script for container initialization - Add Gitea workflow for automated Docker image builds - Add comprehensive RUNPOD_TEMPLATE.md documentation - Add bootstrap-venvs.sh for Python venv health checks This enables deployment of the AI orchestrator on RunPod using: - Minimal Docker image (~2-3GB) for fast deployment - Network volume for models and data persistence (~80-200GB) - Automated builds on push to main or version tags - Full Tailscale VPN integration - Supervisor process management
This commit is contained in:
141
start.sh
Normal file
141
start.sh
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/bin/bash
|
||||
# RunPod container startup script
|
||||
# This script initializes the container environment and starts all services
|
||||
|
||||
set -e
|
||||
|
||||
echo "========================================"
|
||||
echo " RunPod AI Orchestrator - Starting"
|
||||
echo "========================================"
|
||||
|
||||
# [1/7] Start SSH server (required by RunPod)
|
||||
echo "[1/7] Starting SSH server..."
|
||||
service ssh start
|
||||
echo " ✓ SSH server started"
|
||||
|
||||
# [2/7] Add /workspace/bin to PATH for arty and custom scripts
|
||||
echo "[2/7] Configuring PATH..."
|
||||
export PATH="/workspace/bin:$PATH"
|
||||
echo " ✓ PATH updated: /workspace/bin added"
|
||||
|
||||
# [3/7] Source environment variables from network volume
|
||||
echo "[3/7] Loading environment from network volume..."
|
||||
if [ -f /workspace/ai/.env ]; then
|
||||
set -a
|
||||
source /workspace/ai/.env
|
||||
set +a
|
||||
echo " ✓ Environment loaded from /workspace/ai/.env"
|
||||
else
|
||||
echo " ⚠ No .env file found at /workspace/ai/.env"
|
||||
echo " Some services may not function correctly without environment variables"
|
||||
fi
|
||||
|
||||
# [4/7] Configure and start Tailscale VPN
|
||||
echo "[4/7] Configuring Tailscale VPN..."
|
||||
if [ -n "${TAILSCALE_AUTHKEY:-}" ]; then
|
||||
echo " Starting Tailscale daemon..."
|
||||
tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
|
||||
sleep 3
|
||||
|
||||
echo " Connecting to Tailscale network..."
|
||||
HOSTNAME="runpod-$(hostname)"
|
||||
tailscale up --authkey="$TAILSCALE_AUTHKEY" --advertise-tags=tag:gpu --hostname="$HOSTNAME" || {
|
||||
echo " ⚠ Tailscale connection failed, continuing without VPN"
|
||||
}
|
||||
|
||||
# Get Tailscale IP if connected
|
||||
TAILSCALE_IP=$(tailscale ip -4 2>/dev/null || echo "not connected")
|
||||
if [ "$TAILSCALE_IP" != "not connected" ]; then
|
||||
echo " ✓ Tailscale connected"
|
||||
echo " Hostname: $HOSTNAME"
|
||||
echo " IP: $TAILSCALE_IP"
|
||||
|
||||
# Export for other services
|
||||
export GPU_TAILSCALE_IP="$TAILSCALE_IP"
|
||||
else
|
||||
echo " ⚠ Tailscale failed to obtain IP"
|
||||
fi
|
||||
else
|
||||
echo " ⚠ Tailscale disabled (no TAILSCALE_AUTHKEY in .env)"
|
||||
echo " Services requiring VPN connectivity will not work"
|
||||
fi
|
||||
|
||||
# [5/7] Check Python virtual environments health
|
||||
echo "[5/7] Checking Python virtual environments..."
|
||||
PYTHON_VERSION=$(python3 --version)
|
||||
echo " System Python: $PYTHON_VERSION"
|
||||
|
||||
# Check if bootstrap script exists and run it
|
||||
if [ -f /workspace/ai/scripts/bootstrap-venvs.sh ]; then
|
||||
echo " Running venv health check..."
|
||||
bash /workspace/ai/scripts/bootstrap-venvs.sh
|
||||
else
|
||||
echo " ⚠ No venv bootstrap script found (optional)"
|
||||
fi
|
||||
|
||||
# [6/7] Configure Supervisor
|
||||
echo "[6/7] Configuring Supervisor process manager..."
|
||||
if [ -f /workspace/ai/supervisord.conf ]; then
|
||||
# Supervisor expects config at /workspace/supervisord.conf (based on arty scripts)
|
||||
if [ ! -f /workspace/supervisord.conf ]; then
|
||||
cp /workspace/ai/supervisord.conf /workspace/supervisord.conf
|
||||
echo " ✓ Supervisor config copied to /workspace/supervisord.conf"
|
||||
fi
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p /workspace/logs
|
||||
|
||||
echo " ✓ Supervisor configured"
|
||||
else
|
||||
echo " ⚠ No supervisord.conf found at /workspace/ai/supervisord.conf"
|
||||
echo " Supervisor will not be started"
|
||||
fi
|
||||
|
||||
# [7/7] Start Supervisor to manage services
|
||||
echo "[7/7] Starting Supervisor and managed services..."
|
||||
if [ -f /workspace/supervisord.conf ]; then
|
||||
# Start supervisor daemon
|
||||
supervisord -c /workspace/supervisord.conf
|
||||
echo " ✓ Supervisor daemon started"
|
||||
|
||||
# Wait a moment for services to initialize
|
||||
sleep 3
|
||||
|
||||
# Display service status
|
||||
echo ""
|
||||
echo "Service Status:"
|
||||
echo "---------------"
|
||||
supervisorctl -c /workspace/supervisord.conf status || echo " ⚠ Could not query service status"
|
||||
else
|
||||
echo " ⚠ Skipping Supervisor startup (no config file)"
|
||||
fi
|
||||
|
||||
# Display connection information
|
||||
echo ""
|
||||
echo "========================================"
|
||||
echo " Container Ready"
|
||||
echo "========================================"
|
||||
echo "Services:"
|
||||
echo " - SSH: port 22"
|
||||
echo " - ComfyUI: http://localhost:8188"
|
||||
echo " - Supervisor Web UI: http://localhost:9001"
|
||||
echo " - Model Orchestrator: http://localhost:9000"
|
||||
if [ -n "${TAILSCALE_IP:-}" ] && [ "$TAILSCALE_IP" != "not connected" ]; then
|
||||
echo " - Tailscale IP: $TAILSCALE_IP"
|
||||
fi
|
||||
echo ""
|
||||
echo "Network Volume: /workspace"
|
||||
echo "Project Directory: /workspace/ai"
|
||||
echo "Logs: /workspace/logs"
|
||||
echo ""
|
||||
echo "To view service logs:"
|
||||
echo " supervisorctl -c /workspace/supervisord.conf tail -f <service_name>"
|
||||
echo ""
|
||||
echo "To manage services:"
|
||||
echo " supervisorctl -c /workspace/supervisord.conf status"
|
||||
echo " supervisorctl -c /workspace/supervisord.conf restart <service_name>"
|
||||
echo "========================================"
|
||||
|
||||
# Keep container running
|
||||
echo "Container is running. Press Ctrl+C to stop."
|
||||
sleep infinity
|
||||
Reference in New Issue
Block a user