chore: Bump version to 0.1.9
Some checks failed
ci / build-test (push) Failing after 4m53s
Codespell / Check for spelling errors (push) Successful in 10s
sdk / sdks (push) Successful in 11m9s
rust-release / tag-check (push) Successful in 3s
rust-release / Build - windows-latest - x86_64-pc-windows-msvc (push) Has been cancelled
rust-release / release (push) Has been cancelled
rust-release / publish-npm (push) Has been cancelled
rust-release / Build - macos-15-xlarge - aarch64-apple-darwin (push) Has been cancelled
rust-release / Build - macos-15-xlarge - x86_64-apple-darwin (push) Has been cancelled
rust-release / Build - ubuntu-24.04 - x86_64-unknown-linux-gnu (push) Has been cancelled
rust-release / Build - ubuntu-24.04 - x86_64-unknown-linux-musl (push) Has been cancelled
rust-release / Build - ubuntu-24.04-arm - aarch64-unknown-linux-gnu (push) Has been cancelled
rust-release / Build - ubuntu-24.04-arm - aarch64-unknown-linux-musl (push) Has been cancelled
rust-release / Build - windows-11-arm - aarch64-pc-windows-msvc (push) Has been cancelled
rust-ci / Detect changed areas (push) Has been cancelled
rust-ci / Format / etc (push) Has been cancelled
rust-ci / cargo shear (push) Has been cancelled
rust-ci / Lint/Build — macos-14 - aarch64-apple-darwin (push) Has been cancelled
rust-ci / Lint/Build — macos-14 - x86_64-apple-darwin (push) Has been cancelled
rust-ci / Lint/Build — ubuntu-24.04 - x86_64-unknown-linux-gnu (push) Has been cancelled
rust-ci / Lint/Build — ubuntu-24.04 - x86_64-unknown-linux-musl (push) Has been cancelled
rust-ci / Lint/Build — ubuntu-24.04-arm - aarch64-unknown-linux-gnu (push) Has been cancelled
rust-ci / Lint/Build — ubuntu-24.04-arm - aarch64-unknown-linux-musl (push) Has been cancelled
rust-ci / Lint/Build — windows-11-arm - aarch64-pc-windows-msvc (push) Has been cancelled
rust-ci / Lint/Build — windows-latest - x86_64-pc-windows-msvc (push) Has been cancelled
rust-ci / Lint/Build — macos-14 - aarch64-apple-darwin (release) (push) Has been cancelled
rust-ci / Lint/Build — ubuntu-24.04 - x86_64-unknown-linux-musl (release) (push) Has been cancelled
rust-ci / Lint/Build — windows-11-arm - aarch64-pc-windows-msvc (release) (push) Has been cancelled
rust-ci / Lint/Build — windows-latest - x86_64-pc-windows-msvc (release) (push) Has been cancelled
rust-ci / Tests — macos-14 - aarch64-apple-darwin (push) Has been cancelled
rust-ci / Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu (push) Has been cancelled
rust-ci / Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu (push) Has been cancelled
rust-ci / Tests — windows-11-arm - aarch64-pc-windows-msvc (push) Has been cancelled
rust-ci / Tests — windows-latest - x86_64-pc-windows-msvc (push) Has been cancelled
rust-ci / CI results (required) (push) Has been cancelled

- Wire model_max_output_tokens config to Chat Completions API
- Config takes precedence over provider default for max_tokens

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-01 00:02:32 +01:00
parent 84bc98a66b
commit 66e0649b01
5 changed files with 190 additions and 185 deletions

363
llmx-rs/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -43,7 +43,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.1.8"
version = "0.1.9"
# Track the edition for all workspace crates in one place. Individual
# crates can still override this value, but keeping it here means new
# crates created with `cargo new -w ...` automatically inherit the 2024

View File

@@ -45,6 +45,7 @@ pub(crate) async fn stream_chat_completions(
provider: &ModelProviderInfo,
otel_event_manager: &OtelEventManager,
session_source: &SessionSource,
model_max_output_tokens: Option<i64>,
) -> Result<ResponseStream> {
if prompt.output_schema.is_some() {
return Err(LlmxErr::UnsupportedOperation(
@@ -443,8 +444,10 @@ pub(crate) async fn stream_chat_completions(
});
// Add max_tokens - required by Anthropic Messages API
// Use provider config value or default to 20480 (5 * 4096, Claude Sonnet 4.5 supports up to 64K)
let max_tokens = provider.max_tokens.unwrap_or(20480);
// Priority: config model_max_output_tokens > provider max_tokens > default 20480
let max_tokens = model_max_output_tokens
.or(provider.max_tokens)
.unwrap_or(20480);
if let Some(obj) = payload.as_object_mut() {
obj.insert("max_tokens".to_string(), json!(max_tokens));
}

View File

@@ -152,6 +152,7 @@ impl ModelClient {
&self.provider,
&self.otel_event_manager,
&self.session_source,
self.config.model_max_output_tokens,
)
.await?;

View File

@@ -973,6 +973,8 @@ impl Config {
let mut model_providers = built_in_model_providers();
// Merge user-defined providers into the built-in list.
// Note: This uses or_insert() so built-in providers take precedence.
// For custom max_tokens, use model_max_output_tokens config instead.
for (key, provider) in cfg.model_providers.into_iter() {
model_providers.entry(key).or_insert(provider);
}