diff --git a/services/vllm/config_bge.yaml b/services/vllm/config_bge.yaml new file mode 100644 index 0000000..aba81c0 --- /dev/null +++ b/services/vllm/config_bge.yaml @@ -0,0 +1,7 @@ +model: BAAI/bge-large-en-v1.5 +host: "0.0.0.0" +port: 8002 +uvicorn-log-level: "info" +gpu-memory-utilization: 0.15 +dtype: float16 +task: embed diff --git a/services/vllm/config_llama.yaml b/services/vllm/config_llama.yaml index 280343b..e0331fa 100644 --- a/services/vllm/config_llama.yaml +++ b/services/vllm/config_llama.yaml @@ -2,7 +2,9 @@ model: meta-llama/Llama-3.1-8B-Instruct host: "0.0.0.0" port: 8001 uvicorn-log-level: "info" -gpu-memory-utilization: 0.95 -max-model-len: 20480 +gpu-memory-utilization: 0.70 +max-model-len: 16384 dtype: auto enforce-eager: false +enable-auto-tool-choice: true +tool-call-parser: "llama3_json" diff --git a/supervisord.conf b/supervisord.conf index 767710e..f883457 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -74,6 +74,24 @@ priority=200 stopwaitsecs=30 +# vLLM BGE Embedding Server (Port 8002) +[program:bge] +command=services/vllm/venv/bin/vllm serve --config services/vllm/config_bge.yaml +directory=. +autostart=false +autorestart=true +startretries=3 +stderr_logfile=.logs/bge.err.log +stdout_logfile=.logs/bge.out.log +stdout_logfile_maxbytes=50MB +stdout_logfile_backups=10 +stderr_logfile_maxbytes=50MB +stderr_logfile_backups=10 +environment=HF_HOME="./.cache/vllm",HF_TOKEN="%(ENV_HF_TOKEN)s" +priority=210 +stopwaitsecs=30 + + # AudioCraft Studio Service [program:audiocraft] command=services/audiocraft/venv/bin/python services/audiocraft/main.py @@ -113,7 +131,7 @@ programs=comfyui,webdav-sync priority=100 [group:vllm] -programs=llama +programs=llama,bge priority=200 [group:audiocraft]