feat: add Supervisor process manager for service management
- Add supervisord.conf with ComfyUI and orchestrator services - Update Ansible playbook with supervisor installation tag - Rewrite start-all.sh and stop-all.sh to use Supervisor - Add status.sh script for checking service status - Update arty.yml with supervisor commands and shortcuts - Update CLAUDE.md with Supervisor documentation and troubleshooting - Services now auto-restart on crashes with centralized logging Benefits: - Better process control than manual pkill/background jobs - Auto-restart on service crashes - Centralized log management in /workspace/logs/ - Web interface for monitoring (port 9001) - Works perfectly in RunPod containers (no systemd needed) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
60
supervisord.conf
Normal file
60
supervisord.conf
Normal file
@@ -0,0 +1,60 @@
|
||||
[supervisord]
|
||||
logfile=/workspace/logs/supervisord.log
|
||||
pidfile=/workspace/supervisord.pid
|
||||
childlogdir=/workspace/logs
|
||||
nodaemon=false
|
||||
loglevel=info
|
||||
|
||||
[unix_http_server]
|
||||
file=/workspace/supervisor.sock
|
||||
chmod=0700
|
||||
|
||||
[supervisorctl]
|
||||
serverurl=unix:///workspace/supervisor.sock
|
||||
|
||||
[rpcinterface:supervisor]
|
||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||
|
||||
# Web interface for monitoring (localhost only)
|
||||
[inet_http_server]
|
||||
port=127.0.0.1:9001
|
||||
username=admin
|
||||
password=runpod2024
|
||||
|
||||
# ComfyUI Server
|
||||
[program:comfyui]
|
||||
command=bash /workspace/ai/models/comfyui/start.sh
|
||||
directory=/workspace/ComfyUI
|
||||
autostart=true
|
||||
autorestart=true
|
||||
startretries=3
|
||||
stderr_logfile=/workspace/logs/comfyui.err.log
|
||||
stdout_logfile=/workspace/logs/comfyui.out.log
|
||||
stdout_logfile_maxbytes=50MB
|
||||
stdout_logfile_backups=10
|
||||
stderr_logfile_maxbytes=50MB
|
||||
stderr_logfile_backups=10
|
||||
environment=HF_HOME="/workspace/huggingface_cache",PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
|
||||
priority=100
|
||||
stopwaitsecs=30
|
||||
|
||||
# AI Model Orchestrator
|
||||
[program:orchestrator]
|
||||
command=python3 model-orchestrator/orchestrator_subprocess.py
|
||||
directory=/workspace/ai
|
||||
autostart=false
|
||||
autorestart=true
|
||||
startretries=3
|
||||
stderr_logfile=/workspace/logs/orchestrator.err.log
|
||||
stdout_logfile=/workspace/logs/orchestrator.out.log
|
||||
stdout_logfile_maxbytes=50MB
|
||||
stdout_logfile_backups=10
|
||||
stderr_logfile_maxbytes=50MB
|
||||
stderr_logfile_backups=10
|
||||
environment=HF_HOME="/workspace/huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
|
||||
priority=200
|
||||
stopwaitsecs=30
|
||||
|
||||
[group:ai-services]
|
||||
programs=comfyui,orchestrator
|
||||
priority=999
|
||||
Reference in New Issue
Block a user