feat: add Supervisor process manager for service management

- Add supervisord.conf with ComfyUI and orchestrator services - Update Ansible playbook with supervisor installation tag - Rewrite start-all.sh and stop-all.sh to use Supervisor - Add status.sh script for checking service status - Update arty.yml with supervisor commands and shortcuts - Update CLAUDE.md with Supervisor documentation and troubleshooting - Services now auto-restart on crashes with centralized logging Benefits: - Better process control than manual pkill/background jobs - Auto-restart on service crashes - Centralized log management in /workspace/logs/ - Web interface for monitoring (port 9001) - Works perfectly in RunPod containers (no systemd needed) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 09:22:16 +01:00
parent 2207d60f98
commit 664da9f4ea
7 changed files with 306 additions and 29 deletions
--- a/supervisord.conf
+++ b/supervisord.conf
@@ -0,0 +1,60 @@
+[supervisord]
+logfile=/workspace/logs/supervisord.log
+pidfile=/workspace/supervisord.pid
+childlogdir=/workspace/logs
+nodaemon=false
+loglevel=info
+
+[unix_http_server]
+file=/workspace/supervisor.sock
+chmod=0700
+
+[supervisorctl]
+serverurl=unix:///workspace/supervisor.sock
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+# Web interface for monitoring (localhost only)
+[inet_http_server]
+port=127.0.0.1:9001
+username=admin
+password=runpod2024
+
+# ComfyUI Server
+[program:comfyui]
+command=bash /workspace/ai/models/comfyui/start.sh
+directory=/workspace/ComfyUI
+autostart=true
+autorestart=true
+startretries=3
+stderr_logfile=/workspace/logs/comfyui.err.log
+stdout_logfile=/workspace/logs/comfyui.out.log
+stdout_logfile_maxbytes=50MB
+stdout_logfile_backups=10
+stderr_logfile_maxbytes=50MB
+stderr_logfile_backups=10
+environment=HF_HOME="/workspace/huggingface_cache",PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
+priority=100
+stopwaitsecs=30
+
+# AI Model Orchestrator
+[program:orchestrator]
+command=python3 model-orchestrator/orchestrator_subprocess.py
+directory=/workspace/ai
+autostart=false
+autorestart=true
+startretries=3
+stderr_logfile=/workspace/logs/orchestrator.err.log
+stdout_logfile=/workspace/logs/orchestrator.out.log
+stdout_logfile_maxbytes=50MB
+stdout_logfile_backups=10
+stderr_logfile_maxbytes=50MB
+stderr_logfile_backups=10
+environment=HF_HOME="/workspace/huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
+priority=200
+stopwaitsecs=30
+
+[group:ai-services]
+programs=comfyui,orchestrator
+priority=999