diff --git a/llmx-rs/core/src/chat_completions.rs b/llmx-rs/core/src/chat_completions.rs index 2ded19a0..4f24e3e3 100644 --- a/llmx-rs/core/src/chat_completions.rs +++ b/llmx-rs/core/src/chat_completions.rs @@ -443,10 +443,12 @@ pub(crate) async fn stream_chat_completions( }); // Add max_tokens - required by Anthropic Messages API - // Use a sensible default of 8192 if not configured + // Use provider config value or default to 8192 + let max_tokens = provider.max_tokens.unwrap_or(8192); if let Some(obj) = payload.as_object_mut() { - obj.insert("max_tokens".to_string(), json!(8192)); + obj.insert("max_tokens".to_string(), json!(max_tokens)); } + debug!("Using max_tokens: {}", max_tokens); debug!( "POST to {}: {}", diff --git a/llmx-rs/core/src/model_provider_info.rs b/llmx-rs/core/src/model_provider_info.rs index c552c1c5..a1db5242 100644 --- a/llmx-rs/core/src/model_provider_info.rs +++ b/llmx-rs/core/src/model_provider_info.rs @@ -87,6 +87,10 @@ pub struct ModelProviderInfo { /// the connection as lost. pub stream_idle_timeout_ms: Option, + /// Maximum number of tokens to generate in the response. If not specified, defaults to 8192. + /// This is required by some providers (e.g., Anthropic via LiteLLM). + pub max_tokens: Option, + /// Does this provider require an OpenAI API Key or ChatGPT login token? If true, /// user is presented with login screen on first run, and login preference and token/key /// are stored in auth.json. If false (which is the default), login screen is skipped, @@ -290,6 +294,7 @@ pub fn built_in_model_providers() -> HashMap { request_max_retries: None, stream_max_retries: None, stream_idle_timeout_ms: None, + max_tokens: None, requires_openai_auth: false, }, ), @@ -330,6 +335,7 @@ pub fn built_in_model_providers() -> HashMap { request_max_retries: None, stream_max_retries: None, stream_idle_timeout_ms: None, + max_tokens: None, requires_openai_auth: true, }, ), @@ -375,6 +381,7 @@ pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo { request_max_retries: None, stream_max_retries: None, stream_idle_timeout_ms: None, + max_tokens: None, requires_openai_auth: false, } }