Files
docker-compose/ai/litellm-config.yaml
Sebastian Krüger d26310afb7 feat: enable prompt caching for all Claude models
Added supports_prompt_caching: true to all Claude models:
- claude-sonnet-4
- claude-sonnet-4.5
- claude-3-5-sonnet
- claude-3-opus
- claude-3-haiku

This enables Anthropic's prompt caching feature across all models,
significantly reducing latency and costs for repeated requests
with the same system prompts.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 16:07:29 +01:00

70 lines
2.0 KiB
YAML

model_list:
- model_name: claude-sonnet-4
litellm_params:
model: anthropic/claude-sonnet-4-20250514
api_key: os.environ/ANTHROPIC_API_KEY
drop_params: true
supports_prompt_caching: true
- model_name: claude-sonnet-4.5
litellm_params:
model: anthropic/claude-sonnet-4-5-20250929
api_key: os.environ/ANTHROPIC_API_KEY
drop_params: true
supports_prompt_caching: true
- model_name: claude-3-5-sonnet
litellm_params:
model: anthropic/claude-3-5-sonnet-20241022
api_key: os.environ/ANTHROPIC_API_KEY
drop_params: true
supports_prompt_caching: true
- model_name: claude-3-opus
litellm_params:
model: anthropic/claude-3-opus-20240229
api_key: os.environ/ANTHROPIC_API_KEY
drop_params: true
supports_prompt_caching: true
- model_name: claude-3-haiku
litellm_params:
model: anthropic/claude-3-haiku-20240307
api_key: os.environ/ANTHROPIC_API_KEY
drop_params: true
supports_prompt_caching: true
litellm_settings:
drop_params: true
set_verbose: false # Disable verbose logging for better performance
# Enable caching with Redis for better performance
cache: true
cache_params:
type: redis
host: redis
port: 6379
ttl: 3600 # Cache for 1 hour
# Force strip specific parameters globally
allowed_fails: 0
# Modify params before sending to provider
modify_params: true
# Enable success and failure logging but minimize overhead
success_callback: [] # Disable all success callbacks to reduce DB writes
failure_callback: [] # Disable all failure callbacks
router_settings:
allowed_fails: 0
# Drop unsupported parameters
default_litellm_params:
drop_params: true
general_settings:
disable_responses_id_security: true
# Disable spend tracking to reduce database overhead
disable_spend_logs: true
# Disable tag tracking
disable_tag_tracking: true
# Disable daily spend updates
disable_daily_spend_logs: true