diff --git a/ai/compose.yaml b/ai/compose.yaml index 00a7afc..de2bb0c 100644 --- a/ai/compose.yaml +++ b/ai/compose.yaml @@ -34,9 +34,9 @@ services: # Database configuration DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME} - # OpenAI API configuration (pointing to LiteLLM proxy + direct RunPod orchestrator) - OPENAI_API_BASE_URLS: http://litellm:4000;http://100.121.199.88:9000/v1 - OPENAI_API_KEYS: ${AI_LITELLM_API_KEY};dummy + # OpenAI API configuration (pointing to LiteLLM proxy) + OPENAI_API_BASE_URLS: http://litellm:4000 + OPENAI_API_KEYS: ${AI_LITELLM_API_KEY} # Disable Ollama (we only use LiteLLM) ENABLE_OLLAMA_API: false diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml index 9a87781..a790070 100644 --- a/ai/litellm-config.yaml +++ b/ai/litellm-config.yaml @@ -40,6 +40,7 @@ model_list: timeout: 600 # 10 minutes for generation stream_timeout: 600 supports_system_messages: false # vLLM handles system messages differently + stream: true # Enable streaming by default # Image Generation - model_name: flux-schnell