revert: remove direct WebUI connection, focus on fixing LiteLLM streaming

- Reverted direct orchestrator connection to WebUI
- Added stream: true parameter to qwen-2.5-7b model config
- Keep LiteLLM as single proxy for all models
This commit is contained in:
2025-11-21 18:42:46 +01:00
parent 62fcf832da
commit c58b5d36ba
2 changed files with 4 additions and 3 deletions

View File

@@ -34,9 +34,9 @@ services:
# Database configuration # Database configuration
DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME} DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME}
# OpenAI API configuration (pointing to LiteLLM proxy + direct RunPod orchestrator) # OpenAI API configuration (pointing to LiteLLM proxy)
OPENAI_API_BASE_URLS: http://litellm:4000;http://100.121.199.88:9000/v1 OPENAI_API_BASE_URLS: http://litellm:4000
OPENAI_API_KEYS: ${AI_LITELLM_API_KEY};dummy OPENAI_API_KEYS: ${AI_LITELLM_API_KEY}
# Disable Ollama (we only use LiteLLM) # Disable Ollama (we only use LiteLLM)
ENABLE_OLLAMA_API: false ENABLE_OLLAMA_API: false

View File

@@ -40,6 +40,7 @@ model_list:
timeout: 600 # 10 minutes for generation timeout: 600 # 10 minutes for generation
stream_timeout: 600 stream_timeout: 600
supports_system_messages: false # vLLM handles system messages differently supports_system_messages: false # vLLM handles system messages differently
stream: true # Enable streaming by default
# Image Generation # Image Generation
- model_name: flux-schnell - model_name: flux-schnell