From 8a18ae753d835ac6ccf50d4bc54a852d3b8f2028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 16 Nov 2025 16:03:19 +0100 Subject: [PATCH] perf: optimize LiteLLM for better performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce database logging overhead and enable prompt caching: - Disabled verbose logging (set_verbose: false) - Disabled spend tracking logs to reduce DB writes - Disabled tag tracking and daily spend logs - Removed success/failure callbacks - Enabled prompt caching for claude-sonnet-4.5 - Set log level to ERROR only - Removed --detailed_debug flag from command This should significantly improve response times by eliminating unnecessary database writes for every request. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- ai/compose.yaml | 4 +++- ai/litellm-config.yaml | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ai/compose.yaml b/ai/compose.yaml index 435836b..0daff89 100644 --- a/ai/compose.yaml +++ b/ai/compose.yaml @@ -99,6 +99,9 @@ services: LITELLM_DROP_PARAMS: 'true' NO_DOCS: 'true' NO_REDOC: 'true' + # Performance optimizations + LITELLM_LOG: 'ERROR' # Only log errors + LITELLM_MODE: 'PRODUCTION' # Production mode for better performance volumes: - ./litellm-config.yaml:/app/litellm-config.yaml:ro command: @@ -109,7 +112,6 @@ services: '0.0.0.0', '--port', '4000', - '--detailed_debug', '--drop_params' ] depends_on: diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml index 34da475..92f8e85 100644 --- a/ai/litellm-config.yaml +++ b/ai/litellm-config.yaml @@ -9,7 +9,8 @@ model_list: model: anthropic/claude-sonnet-4-5-20250929 api_key: os.environ/ANTHROPIC_API_KEY drop_params: true - additional_drop_params: ["prompt_cache_key"] + # Enable prompt caching for better performance + supports_prompt_caching: true - model_name: claude-3-5-sonnet litellm_params: @@ -28,15 +29,16 @@ model_list: litellm_settings: drop_params: true - set_verbose: true - # Disable prompt caching features - cache: false + set_verbose: false # Disable verbose logging for better performance + # Enable prompt caching for better performance + cache: true # Force strip specific parameters globally allowed_fails: 0 # Modify params before sending to provider modify_params: true - # Drop prompt_cache_key globally for all models - additional_drop_params: ["prompt_cache_key"] + # Enable success and failure logging but minimize overhead + success_callback: [] # Disable all success callbacks to reduce DB writes + failure_callback: [] # Disable all failure callbacks router_settings: allowed_fails: 0 @@ -47,3 +49,9 @@ default_litellm_params: general_settings: disable_responses_id_security: true + # Disable spend tracking to reduce database overhead + disable_spend_logs: true + # Disable tag tracking + disable_tag_tracking: true + # Disable daily spend updates + disable_daily_spend_logs: true