From 8a18ae753d835ac6ccf50d4bc54a852d3b8f2028 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= <valknar@pivoine.art>
Date: Sun, 16 Nov 2025 16:03:19 +0100
Subject: [PATCH] perf: optimize LiteLLM for better performance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reduce database logging overhead and enable prompt caching:

- Disabled verbose logging (set_verbose: false)
- Disabled spend tracking logs to reduce DB writes
- Disabled tag tracking and daily spend logs
- Removed success/failure callbacks
- Enabled prompt caching for claude-sonnet-4.5
- Set log level to ERROR only
- Removed --detailed_debug flag from command

This should significantly improve response times by eliminating
unnecessary database writes for every request.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 ai/compose.yaml        |  4 +++-
 ai/litellm-config.yaml | 20 ++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/ai/compose.yaml b/ai/compose.yaml
index 435836b..0daff89 100644
--- a/ai/compose.yaml
+++ b/ai/compose.yaml
@@ -99,6 +99,9 @@ services:
       LITELLM_DROP_PARAMS: 'true'
       NO_DOCS: 'true'
       NO_REDOC: 'true'
+      # Performance optimizations
+      LITELLM_LOG: 'ERROR'  # Only log errors
+      LITELLM_MODE: 'PRODUCTION'  # Production mode for better performance
     volumes:
       - ./litellm-config.yaml:/app/litellm-config.yaml:ro
     command:
@@ -109,7 +112,6 @@ services:
         '0.0.0.0',
         '--port',
         '4000',
-        '--detailed_debug',
         '--drop_params'
       ]
     depends_on:
diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml
index 34da475..92f8e85 100644
--- a/ai/litellm-config.yaml
+++ b/ai/litellm-config.yaml
@@ -9,7 +9,8 @@ model_list:
       model: anthropic/claude-sonnet-4-5-20250929
       api_key: os.environ/ANTHROPIC_API_KEY
       drop_params: true
-      additional_drop_params: ["prompt_cache_key"]
+      # Enable prompt caching for better performance
+      supports_prompt_caching: true
 
   - model_name: claude-3-5-sonnet
     litellm_params:
@@ -28,15 +29,16 @@ model_list:
 
 litellm_settings:
   drop_params: true
-  set_verbose: true
-  # Disable prompt caching features
-  cache: false
+  set_verbose: false  # Disable verbose logging for better performance
+  # Enable prompt caching for better performance
+  cache: true
   # Force strip specific parameters globally
   allowed_fails: 0
   # Modify params before sending to provider
   modify_params: true
-  # Drop prompt_cache_key globally for all models
-  additional_drop_params: ["prompt_cache_key"]
+  # Enable success and failure logging but minimize overhead
+  success_callback: []  # Disable all success callbacks to reduce DB writes
+  failure_callback: []  # Disable all failure callbacks
 
 router_settings:
   allowed_fails: 0
@@ -47,3 +49,9 @@ default_litellm_params:
 
 general_settings:
   disable_responses_id_security: true
+  # Disable spend tracking to reduce database overhead
+  disable_spend_logs: true
+  # Disable tag tracking
+  disable_tag_tracking: true
+  # Disable daily spend updates
+  disable_daily_spend_logs: true