perf: optimize LiteLLM for better performance

Reduce database logging overhead and enable prompt caching:

- Disabled verbose logging (set_verbose: false)
- Disabled spend tracking logs to reduce DB writes
- Disabled tag tracking and daily spend logs
- Removed success/failure callbacks
- Enabled prompt caching for claude-sonnet-4.5
- Set log level to ERROR only
- Removed --detailed_debug flag from command

This should significantly improve response times by eliminating
unnecessary database writes for every request.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-16 16:03:19 +01:00
parent ffbcecc09d
commit 8a18ae753d
2 changed files with 17 additions and 7 deletions

View File

@@ -99,6 +99,9 @@ services:
LITELLM_DROP_PARAMS: 'true'
NO_DOCS: 'true'
NO_REDOC: 'true'
# Performance optimizations
LITELLM_LOG: 'ERROR' # Only log errors
LITELLM_MODE: 'PRODUCTION' # Production mode for better performance
volumes:
- ./litellm-config.yaml:/app/litellm-config.yaml:ro
command:
@@ -109,7 +112,6 @@ services:
'0.0.0.0',
'--port',
'4000',
'--detailed_debug',
'--drop_params'
]
depends_on:

View File

@@ -9,7 +9,8 @@ model_list:
model: anthropic/claude-sonnet-4-5-20250929
api_key: os.environ/ANTHROPIC_API_KEY
drop_params: true
additional_drop_params: ["prompt_cache_key"]
# Enable prompt caching for better performance
supports_prompt_caching: true
- model_name: claude-3-5-sonnet
litellm_params:
@@ -28,15 +29,16 @@ model_list:
litellm_settings:
drop_params: true
set_verbose: true
# Disable prompt caching features
cache: false
set_verbose: false # Disable verbose logging for better performance
# Enable prompt caching for better performance
cache: true
# Force strip specific parameters globally
allowed_fails: 0
# Modify params before sending to provider
modify_params: true
# Drop prompt_cache_key globally for all models
additional_drop_params: ["prompt_cache_key"]
# Enable success and failure logging but minimize overhead
success_callback: [] # Disable all success callbacks to reduce DB writes
failure_callback: [] # Disable all failure callbacks
router_settings:
allowed_fails: 0
@@ -47,3 +49,9 @@ default_litellm_params:
general_settings:
disable_responses_id_security: true
# Disable spend tracking to reduce database overhead
disable_spend_logs: true
# Disable tag tracking
disable_tag_tracking: true
# Disable daily spend updates
disable_daily_spend_logs: true