feat: add Supervisor process manager for service management

- Add supervisord.conf with ComfyUI and orchestrator services
- Update Ansible playbook with supervisor installation tag
- Rewrite start-all.sh and stop-all.sh to use Supervisor
- Add status.sh script for checking service status
- Update arty.yml with supervisor commands and shortcuts
- Update CLAUDE.md with Supervisor documentation and troubleshooting
- Services now auto-restart on crashes with centralized logging

Benefits:
- Better process control than manual pkill/background jobs
- Auto-restart on service crashes
- Centralized log management in /workspace/logs/
- Web interface for monitoring (port 9001)
- Works perfectly in RunPod containers (no systemd needed)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-22 09:22:16 +01:00
parent 2207d60f98
commit 664da9f4ea
7 changed files with 306 additions and 29 deletions

View File

@@ -1,15 +1,19 @@
#!/bin/bash
#
# Start AI Orchestrator
# Starts the model orchestrator which manages all AI services
# Start AI Services with Supervisor
# Starts supervisor daemon which manages ComfyUI and orchestrator
#
set -e
cd "$(dirname "$0")/.."
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
SUPERVISORD_CONF="${WORKSPACE_DIR}/supervisord.conf"
AI_DIR="${WORKSPACE_DIR}/ai"
cd "${AI_DIR}"
echo "========================================="
echo " Starting AI Orchestrator"
echo " Starting AI Services with Supervisor"
echo "========================================="
echo ""
@@ -27,9 +31,47 @@ if [ -f .env ]; then
set +a
fi
# Start orchestrator
echo "Starting orchestrator on port 9000..."
python3 model-orchestrator/orchestrator_subprocess.py
# Check if supervisord is already running
if [ -f "${WORKSPACE_DIR}/supervisord.pid" ]; then
PID=$(cat "${WORKSPACE_DIR}/supervisord.pid")
if ps -p "$PID" > /dev/null 2>&1; then
echo "Supervisor is already running (PID: $PID)"
echo ""
echo "Checking service status..."
supervisorctl -c "${SUPERVISORD_CONF}" status
exit 0
else
echo "Removing stale PID file..."
rm -f "${WORKSPACE_DIR}/supervisord.pid"
fi
fi
# Start supervisord
echo "Starting Supervisor daemon..."
supervisord -c "${SUPERVISORD_CONF}"
# Wait a moment for supervisor to start
sleep 2
# Check status
echo ""
echo "Service Status:"
echo "---------------"
supervisorctl -c "${SUPERVISORD_CONF}" status
echo ""
echo "Orchestrator stopped"
echo "========================================="
echo "Services started successfully!"
echo "========================================="
echo ""
echo "Useful commands:"
echo " supervisorctl status - Check status"
echo " supervisorctl start orchestrator - Start orchestrator"
echo " supervisorctl restart comfyui - Restart ComfyUI"
echo " supervisorctl stop all - Stop all services"
echo " supervisorctl tail -f comfyui - Follow ComfyUI logs"
echo ""
echo "Web interface: http://localhost:9001"
echo " Username: admin"
echo " Password: runpod2024"
echo ""

47
scripts/status.sh Normal file
View File

@@ -0,0 +1,47 @@
#!/bin/bash
#
# Check AI Services Status
# Shows status of all services managed by Supervisor
#
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
SUPERVISORD_CONF="${WORKSPACE_DIR}/supervisord.conf"
echo "========================================="
echo " AI Services Status"
echo "========================================="
echo ""
# Check if supervisord is running
if [ ! -f "${WORKSPACE_DIR}/supervisord.pid" ]; then
echo "❌ Supervisor is not running"
echo ""
echo "To start services, run:"
echo " bash scripts/start-all.sh"
exit 1
fi
PID=$(cat "${WORKSPACE_DIR}/supervisord.pid")
if ! ps -p "$PID" > /dev/null 2>&1; then
echo "❌ Supervisor PID file exists but process is not running"
echo ""
echo "To start services, run:"
echo " bash scripts/start-all.sh"
exit 1
fi
echo "✅ Supervisor is running (PID: $PID)"
echo ""
# Show service status
echo "Service Status:"
echo "---------------"
supervisorctl -c "${SUPERVISORD_CONF}" status
echo ""
echo "Useful commands:"
echo " supervisorctl start orchestrator - Start orchestrator"
echo " supervisorctl restart comfyui - Restart ComfyUI"
echo " supervisorctl stop all - Stop all services"
echo " supervisorctl tail -f comfyui - Follow ComfyUI logs"
echo ""

View File

@@ -1,22 +1,49 @@
#!/bin/bash
#
# Stop AI Services
# Gracefully stops all running AI services
# Gracefully stops all services managed by Supervisor
#
set -e
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
SUPERVISORD_CONF="${WORKSPACE_DIR}/supervisord.conf"
echo "========================================="
echo " Stopping AI Services"
echo "========================================="
echo ""
# Kill orchestrator and model processes
echo "Stopping orchestrator..."
pkill -f "orchestrator_subprocess.py" || echo "Orchestrator not running"
# Check if supervisord is running
if [ ! -f "${WORKSPACE_DIR}/supervisord.pid" ]; then
echo "Supervisor is not running (no PID file found)"
echo "Cleaning up any stray processes..."
pkill -f "orchestrator_subprocess.py" || echo " - Orchestrator not running"
pkill -f "ComfyUI.*main.py" || echo " - ComfyUI not running"
echo ""
echo "All services stopped"
exit 0
fi
echo "Stopping model services..."
pkill -f "models/vllm/server.py" || echo "vLLM not running"
PID=$(cat "${WORKSPACE_DIR}/supervisord.pid")
if ! ps -p "$PID" > /dev/null 2>&1; then
echo "Supervisor PID file exists but process is not running"
echo "Removing stale PID file..."
rm -f "${WORKSPACE_DIR}/supervisord.pid"
echo ""
echo "All services stopped"
exit 0
fi
# Stop all supervised services
echo "Stopping all supervised services..."
supervisorctl -c "${SUPERVISORD_CONF}" stop all
sleep 2
# Shutdown supervisord
echo "Shutting down Supervisor daemon..."
supervisorctl -c "${SUPERVISORD_CONF}" shutdown
echo ""
echo "All services stopped"