refactor: replace orchestrator with dedicated vLLM servers for Qwen and Llama

2025-11-23 16:00:03 +01:00
parent cc0f55df38
commit 1ad99cdb53
3 changed files with 346 additions and 10 deletions
--- a/supervisord.conf
+++ b/supervisord.conf
@@ -39,15 +39,15 @@ environment=HF_HOME="../huggingface_cache",PYTORCH_CUDA_ALLOC_CONF="expandable_s
 priority=100
 stopwaitsecs=30

-# AI Model Orchestrator
-[program:orchestrator]
-command=model-orchestrator/venv/bin/python model-orchestrator/orchestrator_subprocess.py
+# vLLM Qwen 2.5 7B Server (Port 8000)
+[program:vllm-qwen]
+command=vllm/venv/bin/python vllm/server_qwen.py
 directory=.
 autostart=false
 autorestart=true
 startretries=3
-stderr_logfile=logs/orchestrator.err.log
-stdout_logfile=logs/orchestrator.out.log
+stderr_logfile=logs/vllm-qwen.err.log
+stdout_logfile=logs/vllm-qwen.out.log
 stdout_logfile_maxbytes=50MB
 stdout_logfile_backups=10
 stderr_logfile_maxbytes=50MB
@@ -56,6 +56,23 @@ environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
 priority=200
 stopwaitsecs=30

+# vLLM Llama 3.1 8B Server (Port 8001)
+[program:vllm-llama]
+command=vllm/venv/bin/python vllm/server_llama.py
+directory=.
+autostart=false
+autorestart=true
+startretries=3
+stderr_logfile=logs/vllm-llama.err.log
+stdout_logfile=logs/vllm-llama.out.log
+stdout_logfile_maxbytes=50MB
+stdout_logfile_backups=10
+stderr_logfile_maxbytes=50MB
+stderr_logfile_backups=10
+environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
+priority=201
+stopwaitsecs=30
+
 # ComfyUI WebDAV Sync Service
 [program:webdav-sync]
 command=webdav-sync/venv/bin/python webdav-sync/webdav_sync.py
@@ -74,5 +91,5 @@ priority=150
 stopwaitsecs=10

 [group:ai-services]
-programs=comfyui,orchestrator,webdav-sync
+programs=comfyui,vllm-qwen,vllm-llama,webdav-sync
 priority=999