feat: add Supervisor process manager for service management
- Add supervisord.conf with ComfyUI and orchestrator services - Update Ansible playbook with supervisor installation tag - Rewrite start-all.sh and stop-all.sh to use Supervisor - Add status.sh script for checking service status - Update arty.yml with supervisor commands and shortcuts - Update CLAUDE.md with Supervisor documentation and troubleshooting - Services now auto-restart on crashes with centralized logging Benefits: - Better process control than manual pkill/background jobs - Auto-restart on service crashes - Centralized log management in /workspace/logs/ - Web interface for monitoring (port 9001) - Works perfectly in RunPod containers (no systemd needed) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,15 +1,19 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Start AI Orchestrator
|
||||
# Starts the model orchestrator which manages all AI services
|
||||
# Start AI Services with Supervisor
|
||||
# Starts supervisor daemon which manages ComfyUI and orchestrator
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
|
||||
SUPERVISORD_CONF="${WORKSPACE_DIR}/supervisord.conf"
|
||||
AI_DIR="${WORKSPACE_DIR}/ai"
|
||||
|
||||
cd "${AI_DIR}"
|
||||
|
||||
echo "========================================="
|
||||
echo " Starting AI Orchestrator"
|
||||
echo " Starting AI Services with Supervisor"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
|
||||
@@ -27,9 +31,47 @@ if [ -f .env ]; then
|
||||
set +a
|
||||
fi
|
||||
|
||||
# Start orchestrator
|
||||
echo "Starting orchestrator on port 9000..."
|
||||
python3 model-orchestrator/orchestrator_subprocess.py
|
||||
# Check if supervisord is already running
|
||||
if [ -f "${WORKSPACE_DIR}/supervisord.pid" ]; then
|
||||
PID=$(cat "${WORKSPACE_DIR}/supervisord.pid")
|
||||
if ps -p "$PID" > /dev/null 2>&1; then
|
||||
echo "Supervisor is already running (PID: $PID)"
|
||||
echo ""
|
||||
echo "Checking service status..."
|
||||
supervisorctl -c "${SUPERVISORD_CONF}" status
|
||||
exit 0
|
||||
else
|
||||
echo "Removing stale PID file..."
|
||||
rm -f "${WORKSPACE_DIR}/supervisord.pid"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Start supervisord
|
||||
echo "Starting Supervisor daemon..."
|
||||
supervisord -c "${SUPERVISORD_CONF}"
|
||||
|
||||
# Wait a moment for supervisor to start
|
||||
sleep 2
|
||||
|
||||
# Check status
|
||||
echo ""
|
||||
echo "Service Status:"
|
||||
echo "---------------"
|
||||
supervisorctl -c "${SUPERVISORD_CONF}" status
|
||||
|
||||
echo ""
|
||||
echo "Orchestrator stopped"
|
||||
echo "========================================="
|
||||
echo "Services started successfully!"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
echo "Useful commands:"
|
||||
echo " supervisorctl status - Check status"
|
||||
echo " supervisorctl start orchestrator - Start orchestrator"
|
||||
echo " supervisorctl restart comfyui - Restart ComfyUI"
|
||||
echo " supervisorctl stop all - Stop all services"
|
||||
echo " supervisorctl tail -f comfyui - Follow ComfyUI logs"
|
||||
echo ""
|
||||
echo "Web interface: http://localhost:9001"
|
||||
echo " Username: admin"
|
||||
echo " Password: runpod2024"
|
||||
echo ""
|
||||
|
||||
47
scripts/status.sh
Normal file
47
scripts/status.sh
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Check AI Services Status
|
||||
# Shows status of all services managed by Supervisor
|
||||
#
|
||||
|
||||
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
|
||||
SUPERVISORD_CONF="${WORKSPACE_DIR}/supervisord.conf"
|
||||
|
||||
echo "========================================="
|
||||
echo " AI Services Status"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
|
||||
# Check if supervisord is running
|
||||
if [ ! -f "${WORKSPACE_DIR}/supervisord.pid" ]; then
|
||||
echo "❌ Supervisor is not running"
|
||||
echo ""
|
||||
echo "To start services, run:"
|
||||
echo " bash scripts/start-all.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PID=$(cat "${WORKSPACE_DIR}/supervisord.pid")
|
||||
if ! ps -p "$PID" > /dev/null 2>&1; then
|
||||
echo "❌ Supervisor PID file exists but process is not running"
|
||||
echo ""
|
||||
echo "To start services, run:"
|
||||
echo " bash scripts/start-all.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ Supervisor is running (PID: $PID)"
|
||||
echo ""
|
||||
|
||||
# Show service status
|
||||
echo "Service Status:"
|
||||
echo "---------------"
|
||||
supervisorctl -c "${SUPERVISORD_CONF}" status
|
||||
|
||||
echo ""
|
||||
echo "Useful commands:"
|
||||
echo " supervisorctl start orchestrator - Start orchestrator"
|
||||
echo " supervisorctl restart comfyui - Restart ComfyUI"
|
||||
echo " supervisorctl stop all - Stop all services"
|
||||
echo " supervisorctl tail -f comfyui - Follow ComfyUI logs"
|
||||
echo ""
|
||||
@@ -1,22 +1,49 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Stop AI Services
|
||||
# Gracefully stops all running AI services
|
||||
# Gracefully stops all services managed by Supervisor
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
|
||||
SUPERVISORD_CONF="${WORKSPACE_DIR}/supervisord.conf"
|
||||
|
||||
echo "========================================="
|
||||
echo " Stopping AI Services"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
|
||||
# Kill orchestrator and model processes
|
||||
echo "Stopping orchestrator..."
|
||||
pkill -f "orchestrator_subprocess.py" || echo "Orchestrator not running"
|
||||
# Check if supervisord is running
|
||||
if [ ! -f "${WORKSPACE_DIR}/supervisord.pid" ]; then
|
||||
echo "Supervisor is not running (no PID file found)"
|
||||
echo "Cleaning up any stray processes..."
|
||||
pkill -f "orchestrator_subprocess.py" || echo " - Orchestrator not running"
|
||||
pkill -f "ComfyUI.*main.py" || echo " - ComfyUI not running"
|
||||
echo ""
|
||||
echo "All services stopped"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Stopping model services..."
|
||||
pkill -f "models/vllm/server.py" || echo "vLLM not running"
|
||||
PID=$(cat "${WORKSPACE_DIR}/supervisord.pid")
|
||||
if ! ps -p "$PID" > /dev/null 2>&1; then
|
||||
echo "Supervisor PID file exists but process is not running"
|
||||
echo "Removing stale PID file..."
|
||||
rm -f "${WORKSPACE_DIR}/supervisord.pid"
|
||||
echo ""
|
||||
echo "All services stopped"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Stop all supervised services
|
||||
echo "Stopping all supervised services..."
|
||||
supervisorctl -c "${SUPERVISORD_CONF}" stop all
|
||||
|
||||
sleep 2
|
||||
|
||||
# Shutdown supervisord
|
||||
echo "Shutting down Supervisor daemon..."
|
||||
supervisorctl -c "${SUPERVISORD_CONF}" shutdown
|
||||
|
||||
echo ""
|
||||
echo "All services stopped"
|
||||
|
||||
Reference in New Issue
Block a user