model_list:
  - model_name: claude-sonnet-4
    litellm_params:
      model: anthropic/claude-sonnet-4-20250514
      api_key: os.environ/ANTHROPIC_API_KEY

  - model_name: claude-sonnet-4.5
    litellm_params:
      model: anthropic/claude-sonnet-4-5-20250929
      api_key: os.environ/ANTHROPIC_API_KEY

  - model_name: claude-3-5-sonnet
    litellm_params:
      model: anthropic/claude-3-5-sonnet-20241022
      api_key: os.environ/ANTHROPIC_API_KEY

  - model_name: claude-3-opus
    litellm_params:
      model: anthropic/claude-3-opus-20240229
      api_key: os.environ/ANTHROPIC_API_KEY

  - model_name: claude-3-haiku
    litellm_params:
      model: anthropic/claude-3-haiku-20240307
      api_key: os.environ/ANTHROPIC_API_KEY

  # ===========================================================================
  # SELF-HOSTED MODELS VIA ORCHESTRATOR (GPU Server via Tailscale VPN)
  # ===========================================================================
  # All requests route through orchestrator (port 9000) which manages model loading

  # Text Generation
  - model_name: qwen-2.5-7b
    litellm_params:
      model: openai/qwen-2.5-7b
      api_base: http://100.100.108.13:9000/v1  # Orchestrator endpoint
      api_key: dummy
      rpm: 1000
      tpm: 100000

  # Image Generation
  - model_name: flux-schnell
    litellm_params:
      model: openai/dall-e-3  # OpenAI-compatible mapping
      api_base: http://100.100.108.13:9000/v1  # Orchestrator endpoint
      api_key: dummy
      rpm: 100
      max_parallel_requests: 3

  # Music Generation
  - model_name: musicgen-medium
    litellm_params:
      model: openai/musicgen-medium
      api_base: http://100.100.108.13:9000/v1  # Orchestrator endpoint
      api_key: dummy
      rpm: 50
      max_parallel_requests: 1

litellm_settings:
  drop_params: true
  set_verbose: false  # Disable verbose logging for better performance
  # Enable caching with Redis for better performance
  cache: true
  cache_params:
    type: redis
    host: redis
    port: 6379
    ttl: 3600  # Cache for 1 hour
  # Force strip specific parameters globally
  allowed_fails: 0
  # Modify params before sending to provider
  modify_params: true
  # Enable success and failure logging but minimize overhead
  success_callback: []  # Disable all success callbacks to reduce DB writes
  failure_callback: []  # Disable all failure callbacks

router_settings:
  allowed_fails: 0

# Drop unsupported parameters
default_litellm_params:
  drop_params: true

general_settings:
  disable_responses_id_security: true
  # Disable spend tracking to reduce database overhead
  disable_spend_logs: true
  # Disable tag tracking
  disable_tag_tracking: true
  # Disable daily spend updates
  disable_daily_spend_logs: true