diff --git a/services/vllm/config_bge.yaml b/services/vllm/config_bge.yaml deleted file mode 100644 index 5d61763..0000000 --- a/services/vllm/config_bge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -model: BAAI/bge-large-en-v1.5 -host: "0.0.0.0" -port: 8002 -uvicorn-log-level: "info" -gpu-memory-utilization: 0.1 diff --git a/services/vllm/config_llama.yaml b/services/vllm/config_llama.yaml index c4f1221..f1d4422 100644 --- a/services/vllm/config_llama.yaml +++ b/services/vllm/config_llama.yaml @@ -2,5 +2,7 @@ model: meta-llama/Llama-3.1-8B-Instruct host: "0.0.0.0" port: 8001 uvicorn-log-level: "info" -gpu-memory-utilization: 0.9 -max-model-len: 20480 +gpu-memory-utilization: 0.95 +max-model-len: 32768 +dtype: auto +enforce-eager: false diff --git a/supervisord.conf b/supervisord.conf index dba8488..3704206 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -73,23 +73,6 @@ environment=HF_HOME="./.cache/vllm",HF_TOKEN="%(ENV_HF_TOKEN)s" priority=200 stopwaitsecs=30 -# vLLM BGE Embedding Server (Port 8002) -[program:bge] -command=services/vllm/venv/bin/vllm serve --config services/vllm/config_bge.yaml -directory=. -autostart=false -autorestart=true -startretries=3 -stderr_logfile=.logs/bge.err.log -stdout_logfile=.logs/bge.out.log -stdout_logfile_maxbytes=50MB -stdout_logfile_backups=10 -stderr_logfile_maxbytes=50MB -stderr_logfile_backups=10 -environment=HF_HOME="./.cache/vllm",HF_TOKEN="%(ENV_HF_TOKEN)s" -priority=201 -stopwaitsecs=30 - # AudioCraft Studio Service [program:audiocraft] @@ -112,7 +95,7 @@ programs=comfyui,webdav-sync priority=100 [group:vllm] -programs=llama,bge +programs=llama priority=200 [group:audiocraft]