refactor: replace orchestrator with dedicated vLLM servers for Qwen and Llama

This commit is contained in:
2025-11-23 16:00:03 +01:00
parent cc0f55df38
commit 1ad99cdb53
3 changed files with 346 additions and 10 deletions

View File

@@ -39,15 +39,15 @@ environment=HF_HOME="../huggingface_cache",PYTORCH_CUDA_ALLOC_CONF="expandable_s
priority=100
stopwaitsecs=30
# AI Model Orchestrator
[program:orchestrator]
command=model-orchestrator/venv/bin/python model-orchestrator/orchestrator_subprocess.py
# vLLM Qwen 2.5 7B Server (Port 8000)
[program:vllm-qwen]
command=vllm/venv/bin/python vllm/server_qwen.py
directory=.
autostart=false
autorestart=true
startretries=3
stderr_logfile=logs/orchestrator.err.log
stdout_logfile=logs/orchestrator.out.log
stderr_logfile=logs/vllm-qwen.err.log
stdout_logfile=logs/vllm-qwen.out.log
stdout_logfile_maxbytes=50MB
stdout_logfile_backups=10
stderr_logfile_maxbytes=50MB
@@ -56,6 +56,23 @@ environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
priority=200
stopwaitsecs=30
# vLLM Llama 3.1 8B Server (Port 8001)
[program:vllm-llama]
command=vllm/venv/bin/python vllm/server_llama.py
directory=.
autostart=false
autorestart=true
startretries=3
stderr_logfile=logs/vllm-llama.err.log
stdout_logfile=logs/vllm-llama.out.log
stdout_logfile_maxbytes=50MB
stdout_logfile_backups=10
stderr_logfile_maxbytes=50MB
stderr_logfile_backups=10
environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
priority=201
stopwaitsecs=30
# ComfyUI WebDAV Sync Service
[program:webdav-sync]
command=webdav-sync/venv/bin/python webdav-sync/webdav_sync.py
@@ -74,5 +91,5 @@ priority=150
stopwaitsecs=10
[group:ai-services]
programs=comfyui,orchestrator,webdav-sync
programs=comfyui,vllm-qwen,vllm-llama,webdav-sync
priority=999