diff --git a/ai/compose.yaml b/ai/compose.yaml index 435836b..0daff89 100644 --- a/ai/compose.yaml +++ b/ai/compose.yaml @@ -99,6 +99,9 @@ services: LITELLM_DROP_PARAMS: 'true' NO_DOCS: 'true' NO_REDOC: 'true' + # Performance optimizations + LITELLM_LOG: 'ERROR' # Only log errors + LITELLM_MODE: 'PRODUCTION' # Production mode for better performance volumes: - ./litellm-config.yaml:/app/litellm-config.yaml:ro command: @@ -109,7 +112,6 @@ services: '0.0.0.0', '--port', '4000', - '--detailed_debug', '--drop_params' ] depends_on: diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml index 34da475..92f8e85 100644 --- a/ai/litellm-config.yaml +++ b/ai/litellm-config.yaml @@ -9,7 +9,8 @@ model_list: model: anthropic/claude-sonnet-4-5-20250929 api_key: os.environ/ANTHROPIC_API_KEY drop_params: true - additional_drop_params: ["prompt_cache_key"] + # Enable prompt caching for better performance + supports_prompt_caching: true - model_name: claude-3-5-sonnet litellm_params: @@ -28,15 +29,16 @@ model_list: litellm_settings: drop_params: true - set_verbose: true - # Disable prompt caching features - cache: false + set_verbose: false # Disable verbose logging for better performance + # Enable prompt caching for better performance + cache: true # Force strip specific parameters globally allowed_fails: 0 # Modify params before sending to provider modify_params: true - # Drop prompt_cache_key globally for all models - additional_drop_params: ["prompt_cache_key"] + # Enable success and failure logging but minimize overhead + success_callback: [] # Disable all success callbacks to reduce DB writes + failure_callback: [] # Disable all failure callbacks router_settings: allowed_fails: 0 @@ -47,3 +49,9 @@ default_litellm_params: general_settings: disable_responses_id_security: true + # Disable spend tracking to reduce database overhead + disable_spend_logs: true + # Disable tag tracking + disable_tag_tracking: true + # Disable daily spend updates + disable_daily_spend_logs: true