revert: remove direct WebUI connection, focus on fixing LiteLLM streaming
- Reverted direct orchestrator connection to WebUI - Added stream: true parameter to qwen-2.5-7b model config - Keep LiteLLM as single proxy for all models
This commit is contained in:
@@ -34,9 +34,9 @@ services:
|
|||||||
# Database configuration
|
# Database configuration
|
||||||
DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME}
|
DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME}
|
||||||
|
|
||||||
# OpenAI API configuration (pointing to LiteLLM proxy + direct RunPod orchestrator)
|
# OpenAI API configuration (pointing to LiteLLM proxy)
|
||||||
OPENAI_API_BASE_URLS: http://litellm:4000;http://100.121.199.88:9000/v1
|
OPENAI_API_BASE_URLS: http://litellm:4000
|
||||||
OPENAI_API_KEYS: ${AI_LITELLM_API_KEY};dummy
|
OPENAI_API_KEYS: ${AI_LITELLM_API_KEY}
|
||||||
|
|
||||||
# Disable Ollama (we only use LiteLLM)
|
# Disable Ollama (we only use LiteLLM)
|
||||||
ENABLE_OLLAMA_API: false
|
ENABLE_OLLAMA_API: false
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ model_list:
|
|||||||
timeout: 600 # 10 minutes for generation
|
timeout: 600 # 10 minutes for generation
|
||||||
stream_timeout: 600
|
stream_timeout: 600
|
||||||
supports_system_messages: false # vLLM handles system messages differently
|
supports_system_messages: false # vLLM handles system messages differently
|
||||||
|
stream: true # Enable streaming by default
|
||||||
|
|
||||||
# Image Generation
|
# Image Generation
|
||||||
- model_name: flux-schnell
|
- model_name: flux-schnell
|
||||||
|
|||||||
Reference in New Issue
Block a user