feat(ai): add bge-large-en-v1.5 embedding model to litellm

- Add BGE embedding model config (port 8002) to litellm-config.yaml - Add GPU_VLLM_EMBED_URL env var to compose and .env 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 06:40:36 +01:00
parent ef0309838c
commit d57a1241d2
2 changed files with 10 additions and 0 deletions
--- a/ai/compose.yaml
+++ b/ai/compose.yaml
@@ -104,6 +104,7 @@ services:
      GPU_TAILSCALE_IP: ${GPU_TAILSCALE_IP}
      GPU_VLLM_QWEN_URL: ${GPU_VLLM_QWEN_URL}
      GPU_VLLM_LLAMA_URL: ${GPU_VLLM_LLAMA_URL}
+      GPU_VLLM_EMBED_URL: ${GPU_VLLM_EMBED_URL}
      # LITELLM_DROP_PARAMS: 'true'  # DISABLED: Was breaking streaming
      NO_DOCS: 'true'
      NO_REDOC: 'true'
--- a/ai/litellm-config.yaml
+++ b/ai/litellm-config.yaml
@@ -55,6 +55,15 @@ model_list:
      supports_system_messages: true  # Llama supports system messages
      stream: true  # Enable streaming by default

+  # Embeddings - BGE Large (Port 8002)
+  - model_name: bge-large-en-v1.5
+    litellm_params:
+      model: hosted_vllm/BAAI/bge-large-en-v1.5
+      api_base: os.environ/GPU_VLLM_EMBED_URL  # Direct to vLLM embedding server
+      api_key: "EMPTY"
+      rpm: 1000
+      tpm: 500000
+
 litellm_settings:
  drop_params: false  # DISABLED: Was breaking streaming
  set_verbose: true  # Enable verbose logging for debugging streaming issues