From 90fa8a073cf47772817332290fc3a67ae6c27f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Thu, 27 Nov 2025 01:12:57 +0100 Subject: [PATCH] fix: remove vllm embedding --- services/vllm/config_bge.yaml | 5 ----- services/vllm/config_llama.yaml | 6 ++++-- supervisord.conf | 19 +------------------ 3 files changed, 5 insertions(+), 25 deletions(-) delete mode 100644 services/vllm/config_bge.yaml diff --git a/services/vllm/config_bge.yaml b/services/vllm/config_bge.yaml deleted file mode 100644 index 5d61763..0000000 --- a/services/vllm/config_bge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -model: BAAI/bge-large-en-v1.5 -host: "0.0.0.0" -port: 8002 -uvicorn-log-level: "info" -gpu-memory-utilization: 0.1 diff --git a/services/vllm/config_llama.yaml b/services/vllm/config_llama.yaml index c4f1221..f1d4422 100644 --- a/services/vllm/config_llama.yaml +++ b/services/vllm/config_llama.yaml @@ -2,5 +2,7 @@ model: meta-llama/Llama-3.1-8B-Instruct host: "0.0.0.0" port: 8001 uvicorn-log-level: "info" -gpu-memory-utilization: 0.9 -max-model-len: 20480 +gpu-memory-utilization: 0.95 +max-model-len: 32768 +dtype: auto +enforce-eager: false diff --git a/supervisord.conf b/supervisord.conf index dba8488..3704206 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -73,23 +73,6 @@ environment=HF_HOME="./.cache/vllm",HF_TOKEN="%(ENV_HF_TOKEN)s" priority=200 stopwaitsecs=30 -# vLLM BGE Embedding Server (Port 8002) -[program:bge] -command=services/vllm/venv/bin/vllm serve --config services/vllm/config_bge.yaml -directory=. -autostart=false -autorestart=true -startretries=3 -stderr_logfile=.logs/bge.err.log -stdout_logfile=.logs/bge.out.log -stdout_logfile_maxbytes=50MB -stdout_logfile_backups=10 -stderr_logfile_maxbytes=50MB -stderr_logfile_backups=10 -environment=HF_HOME="./.cache/vllm",HF_TOKEN="%(ENV_HF_TOKEN)s" -priority=201 -stopwaitsecs=30 - # AudioCraft Studio Service [program:audiocraft] @@ -112,7 +95,7 @@ programs=comfyui,webdav-sync priority=100 [group:vllm] -programs=llama,bge +programs=llama priority=200 [group:audiocraft]