feat: add direct RunPod orchestrator connection to WebUI for streaming bypass

- Configure WebUI with both LiteLLM and direct orchestrator API base URLs - This bypasses LiteLLM's streaming issues for the qwen-2.5-7b model - WebUI will now show models from both endpoints - Allows testing if LiteLLM is the bottleneck for streaming Related to streaming fix in RunPod models/vllm/server.py
2025-11-21 18:38:31 +01:00
parent dfde1df72f
commit 62fcf832da
1 changed files with 3 additions and 3 deletions
--- a/ai/compose.yaml
+++ b/ai/compose.yaml
@@ -34,9 +34,9 @@ services:
      # Database configuration
      DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME}

-      # OpenAI API configuration (pointing to LiteLLM proxy)
-      OPENAI_API_BASE_URLS: http://litellm:4000
-      OPENAI_API_KEYS: ${AI_LITELLM_API_KEY}
+      # OpenAI API configuration (pointing to LiteLLM proxy + direct RunPod orchestrator)
+      OPENAI_API_BASE_URLS: http://litellm:4000;http://100.121.199.88:9000/v1
+      OPENAI_API_KEYS: ${AI_LITELLM_API_KEY};dummy

      # Disable Ollama (we only use LiteLLM)
      ENABLE_OLLAMA_API: false