Compare commits
8 Commits
rust-v0.1.
...
rust-v0.1.
| Author | SHA1 | Date | |
|---|---|---|---|
| 7237627ac7 | |||
| 75dda1c285 | |||
| 8f79e89db2 | |||
| c0775ad8a3 | |||
| ee75cfaa7f | |||
| 085d8c9343 | |||
| 462b219d3f | |||
| 63de226119 |
1392
llmx-rs/Cargo.lock
generated
1392
llmx-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -43,7 +43,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.1.5"
|
||||
version = "0.1.7"
|
||||
# Track the edition for all workspace crates in one place. Individual
|
||||
# crates can still override this value, but keeping it here means new
|
||||
# crates created with `cargo new -w ...` automatically inherit the 2024
|
||||
@@ -191,7 +191,7 @@ tokio-util = "0.7.16"
|
||||
toml = "0.9.5"
|
||||
toml_edit = "0.23.4"
|
||||
tonic = "0.13.1"
|
||||
tracing = "0.1.51"
|
||||
tracing = "0.1.41"
|
||||
tracing-appender = "0.2.3"
|
||||
tracing-subscriber = "0.3.20"
|
||||
tracing-test = "0.2.5"
|
||||
|
||||
@@ -138,7 +138,7 @@ impl McpProcess {
|
||||
client_info: ClientInfo {
|
||||
name: "llmx-app-server-tests".to_string(),
|
||||
title: None,
|
||||
version: "0.1.5".to_string(),
|
||||
version: "0.1.7".to_string(),
|
||||
},
|
||||
})?);
|
||||
let req_id = self.send_request("initialize", params).await?;
|
||||
|
||||
@@ -26,7 +26,7 @@ async fn get_user_agent_returns_current_llmx_user_agent() -> Result<()> {
|
||||
|
||||
let os_info = os_info::get();
|
||||
let user_agent = format!(
|
||||
"llmx_cli_rs/0.1.5 ({} {}; {}) {} (llmx-app-server-tests; 0.1.5)",
|
||||
"llmx_cli_rs/0.1.7 ({} {}; {}) {} (llmx-app-server-tests; 0.1.7)",
|
||||
os_info.os_type(),
|
||||
os_info.version(),
|
||||
os_info.architecture().unwrap_or("unknown"),
|
||||
|
||||
@@ -56,7 +56,12 @@ pub(crate) async fn stream_chat_completions(
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(model_family);
|
||||
messages.push(json!({"role": "system", "content": full_instructions}));
|
||||
// Add cache_control to system instructions for Anthropic prompt caching
|
||||
messages.push(json!({
|
||||
"role": "system",
|
||||
"content": full_instructions,
|
||||
"cache_control": {"type": "ephemeral"}
|
||||
}));
|
||||
|
||||
let input = prompt.get_formatted_input();
|
||||
|
||||
@@ -413,6 +418,20 @@ pub(crate) async fn stream_chat_completions(
|
||||
}
|
||||
|
||||
debug!("Built {} messages for API request", messages.len());
|
||||
|
||||
// Add cache_control to conversation history for Anthropic prompt caching
|
||||
// Add it to a message that's at least 3 messages before the end (stable history)
|
||||
// This caches the earlier conversation while keeping recent turns uncached
|
||||
if messages.len() > 4 {
|
||||
let cache_idx = messages.len().saturating_sub(4);
|
||||
if let Some(msg) = messages.get_mut(cache_idx) {
|
||||
if let Some(obj) = msg.as_object_mut() {
|
||||
obj.insert("cache_control".to_string(), json!({"type": "ephemeral"}));
|
||||
debug!("Added cache_control to message at index {} (conversation history)", cache_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("=== End Chat Completions Request Debug ===");
|
||||
|
||||
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
|
||||
@@ -424,10 +443,12 @@ pub(crate) async fn stream_chat_completions(
|
||||
});
|
||||
|
||||
// Add max_tokens - required by Anthropic Messages API
|
||||
// Use a sensible default of 8192 if not configured
|
||||
// Use provider config value or default to 8192
|
||||
let max_tokens = provider.max_tokens.unwrap_or(8192);
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
obj.insert("max_tokens".to_string(), json!(8192));
|
||||
obj.insert("max_tokens".to_string(), json!(max_tokens));
|
||||
}
|
||||
debug!("Using max_tokens: {}", max_tokens);
|
||||
|
||||
debug!(
|
||||
"POST to {}: {}",
|
||||
|
||||
@@ -1123,6 +1123,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1187,6 +1188,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1224,6 +1226,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1263,6 +1266,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1298,6 +1302,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1333,6 +1338,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1437,6 +1443,7 @@ mod tests {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -2809,6 +2809,7 @@ model_verbosity = "high"
|
||||
request_max_retries: Some(4),
|
||||
stream_max_retries: Some(10),
|
||||
stream_idle_timeout_ms: Some(300_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
let model_provider_map = {
|
||||
|
||||
@@ -87,6 +87,10 @@ pub struct ModelProviderInfo {
|
||||
/// the connection as lost.
|
||||
pub stream_idle_timeout_ms: Option<u64>,
|
||||
|
||||
/// Maximum number of tokens to generate in the response. If not specified, defaults to 8192.
|
||||
/// This is required by some providers (e.g., Anthropic via LiteLLM).
|
||||
pub max_tokens: Option<i64>,
|
||||
|
||||
/// Does this provider require an OpenAI API Key or ChatGPT login token? If true,
|
||||
/// user is presented with login screen on first run, and login preference and token/key
|
||||
/// are stored in auth.json. If false (which is the default), login screen is skipped,
|
||||
@@ -290,6 +294,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
},
|
||||
),
|
||||
@@ -330,6 +335,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
max_tokens: None,
|
||||
requires_openai_auth: true,
|
||||
},
|
||||
),
|
||||
@@ -375,6 +381,7 @@ pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ async fn responses_stream_includes_subagent_header_on_review() {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -135,6 +136,7 @@ async fn responses_stream_includes_subagent_header_on_other() {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -712,6 +712,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1195,6 +1196,7 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
@@ -1272,6 +1274,7 @@ async fn env_var_overrides_loaded_auth() {
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ async fn continue_after_stream_error() {
|
||||
request_max_retries: Some(1),
|
||||
stream_max_retries: Some(1),
|
||||
stream_idle_timeout_ms: Some(2_000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ async fn retries_on_early_close() {
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(1),
|
||||
stream_idle_timeout_ms: Some(2000),
|
||||
max_tokens: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ impl McpProcess {
|
||||
let initialized = self.read_jsonrpc_message().await?;
|
||||
let os_info = os_info::get();
|
||||
let user_agent = format!(
|
||||
"llmx_cli_rs/0.1.5 ({} {}; {}) {} (elicitation test; 0.0.0)",
|
||||
"llmx_cli_rs/0.1.7 ({} {}; {}) {} (elicitation test; 0.0.0)",
|
||||
os_info.os_type(),
|
||||
os_info.version(),
|
||||
os_info.architecture().unwrap_or("unknown"),
|
||||
@@ -163,7 +163,7 @@ impl McpProcess {
|
||||
"serverInfo": {
|
||||
"name": "llmx-mcp-server",
|
||||
"title": "LLMX",
|
||||
"version": "0.1.5",
|
||||
"version": "0.1.7",
|
||||
"user_agent": user_agent
|
||||
},
|
||||
"protocolVersion": mcp_types::MCP_SCHEMA_VERSION
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭───────────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.5) │
|
||||
│ >_ LLMX (v0.1.7) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.5) │
|
||||
│ >_ LLMX (v0.1.7) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭──────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.5) │
|
||||
│ >_ LLMX (v0.1.7) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭──────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.5) │
|
||||
│ >_ LLMX (v0.1.7) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭───────────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.5) │
|
||||
│ >_ LLMX (v0.1.7) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.5) │
|
||||
│ >_ LLMX (v0.1.7) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/ │
|
||||
│ usage for up-to-date │
|
||||
|
||||
10
llmx-rs/tui/tests/fixtures/binary-size-log.jsonl
vendored
10
llmx-rs/tui/tests/fixtures/binary-size-log.jsonl
vendored
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user