diff --git a/ai/compose.yaml b/ai/compose.yaml
index 00a7afc..de2bb0c 100644
--- a/ai/compose.yaml
+++ b/ai/compose.yaml
@@ -34,9 +34,9 @@ services:
       # Database configuration
       DATABASE_URL: postgresql://${AI_DB_USER}:${AI_DB_PASSWORD}@ai_postgres:5432/${AI_DB_NAME}
 
-      # OpenAI API configuration (pointing to LiteLLM proxy + direct RunPod orchestrator)
-      OPENAI_API_BASE_URLS: http://litellm:4000;http://100.121.199.88:9000/v1
-      OPENAI_API_KEYS: ${AI_LITELLM_API_KEY};dummy
+      # OpenAI API configuration (pointing to LiteLLM proxy)
+      OPENAI_API_BASE_URLS: http://litellm:4000
+      OPENAI_API_KEYS: ${AI_LITELLM_API_KEY}
 
       # Disable Ollama (we only use LiteLLM)
       ENABLE_OLLAMA_API: false
diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml
index 9a87781..a790070 100644
--- a/ai/litellm-config.yaml
+++ b/ai/litellm-config.yaml
@@ -40,6 +40,7 @@ model_list:
       timeout: 600  # 10 minutes for generation
       stream_timeout: 600
       supports_system_messages: false  # vLLM handles system messages differently
+      stream: true  # Enable streaming by default
 
   # Image Generation
   - model_name: flux-schnell