Compare commits
2 Commits
rust-v0.1.
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 66e0649b01 | |||
| 84bc98a66b |
363
llmx-rs/Cargo.lock
generated
363
llmx-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -43,7 +43,7 @@ members = [
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.1.8"
|
version = "0.1.9"
|
||||||
# Track the edition for all workspace crates in one place. Individual
|
# Track the edition for all workspace crates in one place. Individual
|
||||||
# crates can still override this value, but keeping it here means new
|
# crates can still override this value, but keeping it here means new
|
||||||
# crates created with `cargo new -w ...` automatically inherit the 2024
|
# crates created with `cargo new -w ...` automatically inherit the 2024
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ pub(crate) async fn stream_chat_completions(
|
|||||||
provider: &ModelProviderInfo,
|
provider: &ModelProviderInfo,
|
||||||
otel_event_manager: &OtelEventManager,
|
otel_event_manager: &OtelEventManager,
|
||||||
session_source: &SessionSource,
|
session_source: &SessionSource,
|
||||||
|
model_max_output_tokens: Option<i64>,
|
||||||
) -> Result<ResponseStream> {
|
) -> Result<ResponseStream> {
|
||||||
if prompt.output_schema.is_some() {
|
if prompt.output_schema.is_some() {
|
||||||
return Err(LlmxErr::UnsupportedOperation(
|
return Err(LlmxErr::UnsupportedOperation(
|
||||||
@@ -443,8 +444,10 @@ pub(crate) async fn stream_chat_completions(
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Add max_tokens - required by Anthropic Messages API
|
// Add max_tokens - required by Anthropic Messages API
|
||||||
// Use provider config value or default to 20480 (5 * 4096, Claude Sonnet 4.5 supports up to 64K)
|
// Priority: config model_max_output_tokens > provider max_tokens > default 20480
|
||||||
let max_tokens = provider.max_tokens.unwrap_or(20480);
|
let max_tokens = model_max_output_tokens
|
||||||
|
.or(provider.max_tokens)
|
||||||
|
.unwrap_or(20480);
|
||||||
if let Some(obj) = payload.as_object_mut() {
|
if let Some(obj) = payload.as_object_mut() {
|
||||||
obj.insert("max_tokens".to_string(), json!(max_tokens));
|
obj.insert("max_tokens".to_string(), json!(max_tokens));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -152,6 +152,7 @@ impl ModelClient {
|
|||||||
&self.provider,
|
&self.provider,
|
||||||
&self.otel_event_manager,
|
&self.otel_event_manager,
|
||||||
&self.session_source,
|
&self.session_source,
|
||||||
|
self.config.model_max_output_tokens,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
|||||||
@@ -973,6 +973,8 @@ impl Config {
|
|||||||
|
|
||||||
let mut model_providers = built_in_model_providers();
|
let mut model_providers = built_in_model_providers();
|
||||||
// Merge user-defined providers into the built-in list.
|
// Merge user-defined providers into the built-in list.
|
||||||
|
// Note: This uses or_insert() so built-in providers take precedence.
|
||||||
|
// For custom max_tokens, use model_max_output_tokens config instead.
|
||||||
for (key, provider) in cfg.model_providers.into_iter() {
|
for (key, provider) in cfg.model_providers.into_iter() {
|
||||||
model_providers.entry(key).or_insert(provider);
|
model_providers.entry(key).or_insert(provider);
|
||||||
}
|
}
|
||||||
|
|||||||
198
test_system_message.json
Normal file
198
test_system_message.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user