From 120bf7c385584002c15b9e309d2e914d017af533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 30 Nov 2025 20:12:07 +0100 Subject: [PATCH] feat(ai): bge over litellm --- ai/compose.yaml | 1 + ai/litellm-config.yaml | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/ai/compose.yaml b/ai/compose.yaml index 06a919e..864f9e0 100644 --- a/ai/compose.yaml +++ b/ai/compose.yaml @@ -105,6 +105,7 @@ services: LITELLM_MASTER_KEY: ${AI_LITELLM_API_KEY} DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/litellm GPU_VLLM_LLAMA_URL: ${GPU_VLLM_LLAMA_URL} + GPU_VLLM_BGE_URL: ${GPU_VLLM_BGE_URL} # LITELLM_DROP_PARAMS: 'true' # DISABLED: Was breaking streaming NO_DOCS: "true" NO_REDOC: "true" diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml index d9514df..325e2b0 100644 --- a/ai/litellm-config.yaml +++ b/ai/litellm-config.yaml @@ -42,6 +42,15 @@ model_list: supports_system_messages: true # Llama supports system messages stream: true # Enable streaming by default + # Embeddings - BGE Large (Port 8002) + - model_name: bge-large-en + litellm_params: + model: openai/BAAI/bge-large-en-v1.5 + api_base: os.environ/GPU_VLLM_BGE_URL + api_key: "EMPTY" + rpm: 1000 + tpm: 500000 + litellm_settings: drop_params: false # DISABLED: Was breaking streaming set_verbose: true # Enable verbose logging for debugging streaming issues