fix: Regenerate Cargo.lock for v0.1.7

- Regenerated lockfile with compatible dependency versions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
chore: Bump version to 0.1.7
2025-11-17 11:25:44 +01:00 · 2025-11-17 11:18:26 +01:00 · 2025-11-17 11:01:31 +01:00 · 2025-11-17 10:40:15 +01:00 · 2025-11-16 21:59:54 +01:00 · 2025-11-16 21:52:00 +01:00
22 changed files with 798 additions and 679 deletions
--- a/llmx-rs/Cargo.lock
+++ b/llmx-rs/Cargo.lock
--- a/llmx-rs/Cargo.toml
+++ b/llmx-rs/Cargo.toml
@@ -43,7 +43,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.1.5"
+version = "0.1.7"
 # Track the edition for all workspace crates in one place. Individual
 # crates can still override this value, but keeping it here means new
 # crates created with `cargo new -w ...` automatically inherit the 2024
@@ -191,7 +191,7 @@ tokio-util = "0.7.16"
 toml = "0.9.5"
 toml_edit = "0.23.4"
 tonic = "0.13.1"
-tracing = "0.1.51"
+tracing = "0.1.41"
 tracing-appender = "0.2.3"
 tracing-subscriber = "0.3.20"
 tracing-test = "0.2.5"
--- a/llmx-rs/app-server/tests/common/mcp_process.rs
+++ b/llmx-rs/app-server/tests/common/mcp_process.rs
@@ -138,7 +138,7 @@ impl McpProcess {
            client_info: ClientInfo {
                name: "llmx-app-server-tests".to_string(),
                title: None,
-                version: "0.1.5".to_string(),
+                version: "0.1.7".to_string(),
            },
        })?);
        let req_id = self.send_request("initialize", params).await?;
--- a/llmx-rs/app-server/tests/suite/user_agent.rs
+++ b/llmx-rs/app-server/tests/suite/user_agent.rs
@@ -26,7 +26,7 @@ async fn get_user_agent_returns_current_llmx_user_agent() -> Result<()> {

    let os_info = os_info::get();
    let user_agent = format!(
-        "llmx_cli_rs/0.1.5 ({} {}; {}) {} (llmx-app-server-tests; 0.1.5)",
+        "llmx_cli_rs/0.1.7 ({} {}; {}) {} (llmx-app-server-tests; 0.1.7)",
        os_info.os_type(),
        os_info.version(),
        os_info.architecture().unwrap_or("unknown"),
--- a/llmx-rs/core/src/chat_completions.rs
+++ b/llmx-rs/core/src/chat_completions.rs
@@ -56,7 +56,12 @@ pub(crate) async fn stream_chat_completions(
    let mut messages = Vec::<serde_json::Value>::new();

    let full_instructions = prompt.get_full_instructions(model_family);
-    messages.push(json!({"role": "system", "content": full_instructions}));
+    // Add cache_control to system instructions for Anthropic prompt caching
+    messages.push(json!({
+        "role": "system",
+        "content": full_instructions,
+        "cache_control": {"type": "ephemeral"}
+    }));

    let input = prompt.get_formatted_input();

@@ -413,6 +418,20 @@ pub(crate) async fn stream_chat_completions(
    }

    debug!("Built {} messages for API request", messages.len());
+
+    // Add cache_control to conversation history for Anthropic prompt caching
+    // Add it to a message that's at least 3 messages before the end (stable history)
+    // This caches the earlier conversation while keeping recent turns uncached
+    if messages.len() > 4 {
+        let cache_idx = messages.len().saturating_sub(4);
+        if let Some(msg) = messages.get_mut(cache_idx) {
+            if let Some(obj) = msg.as_object_mut() {
+                obj.insert("cache_control".to_string(), json!({"type": "ephemeral"}));
+                debug!("Added cache_control to message at index {} (conversation history)", cache_idx);
+            }
+        }
+    }
+
    debug!("=== End Chat Completions Request Debug ===");

    let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
@@ -424,10 +443,12 @@ pub(crate) async fn stream_chat_completions(
    });

    // Add max_tokens - required by Anthropic Messages API
-    // Use a sensible default of 8192 if not configured
+    // Use provider config value or default to 8192
+    let max_tokens = provider.max_tokens.unwrap_or(8192);
    if let Some(obj) = payload.as_object_mut() {
-        obj.insert("max_tokens".to_string(), json!(8192));
+        obj.insert("max_tokens".to_string(), json!(max_tokens));
    }
+    debug!("Using max_tokens: {}", max_tokens);

    debug!(
        "POST to {}: {}",
--- a/llmx-rs/core/src/client.rs
+++ b/llmx-rs/core/src/client.rs
@@ -1123,6 +1123,7 @@ mod tests {
            request_max_retries: Some(0),
            stream_max_retries: Some(0),
            stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
            requires_openai_auth: false,
        };

@@ -1187,6 +1188,7 @@ mod tests {
            request_max_retries: Some(0),
            stream_max_retries: Some(0),
            stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
            requires_openai_auth: false,
        };

@@ -1224,6 +1226,7 @@ mod tests {
            request_max_retries: Some(0),
            stream_max_retries: Some(0),
            stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
            requires_openai_auth: false,
        };

@@ -1263,6 +1266,7 @@ mod tests {
            request_max_retries: Some(0),
            stream_max_retries: Some(0),
            stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
            requires_openai_auth: false,
        };

@@ -1298,6 +1302,7 @@ mod tests {
            request_max_retries: Some(0),
            stream_max_retries: Some(0),
            stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
            requires_openai_auth: false,
        };

@@ -1333,6 +1338,7 @@ mod tests {
            request_max_retries: Some(0),
            stream_max_retries: Some(0),
            stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
            requires_openai_auth: false,
        };

@@ -1437,6 +1443,7 @@ mod tests {
                request_max_retries: Some(0),
                stream_max_retries: Some(0),
                stream_idle_timeout_ms: Some(1000),
+        max_tokens: None,
                requires_openai_auth: false,
            };

--- a/llmx-rs/core/src/config/mod.rs
+++ b/llmx-rs/core/src/config/mod.rs
@@ -2809,6 +2809,7 @@ model_verbosity = "high"
            request_max_retries: Some(4),
            stream_max_retries: Some(10),
            stream_idle_timeout_ms: Some(300_000),
+        max_tokens: None,
            requires_openai_auth: false,
        };
        let model_provider_map = {
--- a/llmx-rs/core/src/model_provider_info.rs
+++ b/llmx-rs/core/src/model_provider_info.rs
@@ -87,6 +87,10 @@ pub struct ModelProviderInfo {
    /// the connection as lost.
    pub stream_idle_timeout_ms: Option<u64>,

+    /// Maximum number of tokens to generate in the response. If not specified, defaults to 8192.
+    /// This is required by some providers (e.g., Anthropic via LiteLLM).
+    pub max_tokens: Option<i64>,
+
    /// Does this provider require an OpenAI API Key or ChatGPT login token? If true,
    /// user is presented with login screen on first run, and login preference and token/key
    /// are stored in auth.json. If false (which is the default), login screen is skipped,
@@ -290,6 +294,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
                request_max_retries: None,
                stream_max_retries: None,
                stream_idle_timeout_ms: None,
+                max_tokens: None,
                requires_openai_auth: false,
            },
        ),
@@ -330,6 +335,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
                request_max_retries: None,
                stream_max_retries: None,
                stream_idle_timeout_ms: None,
+                max_tokens: None,
                requires_openai_auth: true,
            },
        ),
@@ -375,6 +381,7 @@ pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
        request_max_retries: None,
        stream_max_retries: None,
        stream_idle_timeout_ms: None,
+        max_tokens: None,
        requires_openai_auth: false,
    }
 }
--- a/llmx-rs/core/tests/chat_completions_payload.rs
+++ b/llmx-rs/core/tests/chat_completions_payload.rs
@@ -58,6 +58,7 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
        request_max_retries: Some(0),
        stream_max_retries: Some(0),
        stream_idle_timeout_ms: Some(5_000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

--- a/llmx-rs/core/tests/chat_completions_sse.rs
+++ b/llmx-rs/core/tests/chat_completions_sse.rs
@@ -58,6 +58,7 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
        request_max_retries: Some(0),
        stream_max_retries: Some(0),
        stream_idle_timeout_ms: Some(5_000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

--- a/llmx-rs/core/tests/responses_headers.rs
+++ b/llmx-rs/core/tests/responses_headers.rs
@@ -47,6 +47,7 @@ async fn responses_stream_includes_subagent_header_on_review() {
        request_max_retries: Some(0),
        stream_max_retries: Some(0),
        stream_idle_timeout_ms: Some(5_000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

@@ -135,6 +136,7 @@ async fn responses_stream_includes_subagent_header_on_other() {
        request_max_retries: Some(0),
        stream_max_retries: Some(0),
        stream_idle_timeout_ms: Some(5_000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

--- a/llmx-rs/core/tests/suite/client.rs
+++ b/llmx-rs/core/tests/suite/client.rs
@@ -712,6 +712,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
        request_max_retries: Some(0),
        stream_max_retries: Some(0),
        stream_idle_timeout_ms: Some(5_000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

@@ -1195,6 +1196,7 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
        request_max_retries: None,
        stream_max_retries: None,
        stream_idle_timeout_ms: None,
+        max_tokens: None,
        requires_openai_auth: false,
    };

@@ -1272,6 +1274,7 @@ async fn env_var_overrides_loaded_auth() {
        request_max_retries: None,
        stream_max_retries: None,
        stream_idle_timeout_ms: None,
+        max_tokens: None,
        requires_openai_auth: false,
    };

--- a/llmx-rs/core/tests/suite/stream_error_allows_next_turn.rs
+++ b/llmx-rs/core/tests/suite/stream_error_allows_next_turn.rs
@@ -72,6 +72,7 @@ async fn continue_after_stream_error() {
        request_max_retries: Some(1),
        stream_max_retries: Some(1),
        stream_idle_timeout_ms: Some(2_000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

--- a/llmx-rs/core/tests/suite/stream_no_completed.rs
+++ b/llmx-rs/core/tests/suite/stream_no_completed.rs
@@ -80,6 +80,7 @@ async fn retries_on_early_close() {
        request_max_retries: Some(0),
        stream_max_retries: Some(1),
        stream_idle_timeout_ms: Some(2000),
+        max_tokens: None,
        requires_openai_auth: false,
    };

--- a/llmx-rs/mcp-server/tests/common/mcp_process.rs
+++ b/llmx-rs/mcp-server/tests/common/mcp_process.rs
@@ -144,7 +144,7 @@ impl McpProcess {
        let initialized = self.read_jsonrpc_message().await?;
        let os_info = os_info::get();
        let user_agent = format!(
-            "llmx_cli_rs/0.1.5 ({} {}; {}) {} (elicitation test; 0.0.0)",
+            "llmx_cli_rs/0.1.7 ({} {}; {}) {} (elicitation test; 0.0.0)",
            os_info.os_type(),
            os_info.version(),
            os_info.architecture().unwrap_or("unknown"),
@@ -163,7 +163,7 @@ impl McpProcess {
                    "serverInfo": {
                        "name": "llmx-mcp-server",
                        "title": "LLMX",
-                        "version": "0.1.5",
+                        "version": "0.1.7",
                        "user_agent": user_agent
                    },
                    "protocolVersion": mcp_types::MCP_SCHEMA_VERSION
--- a/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_includes_monthly_limit.snap
+++ b/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_includes_monthly_limit.snap
@@ -5,7 +5,7 @@ expression: sanitized
 /status

 ╭───────────────────────────────────────────────────────────────────────────╮
-│  >_ LLMX (v0.1.5)                                                         │
+│  >_ LLMX (v0.1.7)                                                         │
 │                                                                           │
 │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date              │
 │ information on rate limits and credits                                    │
--- a/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_includes_reasoning_details.snap
+++ b/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_includes_reasoning_details.snap
@@ -5,7 +5,7 @@ expression: sanitized
 /status

 ╭─────────────────────────────────────────────────────────────────╮
-│  >_ LLMX (v0.1.5)                                               │
+│  >_ LLMX (v0.1.7)                                               │
 │                                                                 │
 │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date    │
 │ information on rate limits and credits                          │
--- a/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_shows_empty_limits_message.snap
+++ b/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_shows_empty_limits_message.snap
@@ -5,7 +5,7 @@ expression: sanitized
 /status

 ╭──────────────────────────────────────────────────────────────╮
-│  >_ LLMX (v0.1.5)                                            │
+│  >_ LLMX (v0.1.7)                                            │
 │                                                              │
 │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
 │ information on rate limits and credits                       │
--- a/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_shows_missing_limits_message.snap
+++ b/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_shows_missing_limits_message.snap
@@ -5,7 +5,7 @@ expression: sanitized
 /status

 ╭──────────────────────────────────────────────────────────────╮
-│  >_ LLMX (v0.1.5)                                            │
+│  >_ LLMX (v0.1.7)                                            │
 │                                                              │
 │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
 │ information on rate limits and credits                       │
--- a/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_shows_stale_limits_message.snap
+++ b/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_shows_stale_limits_message.snap
@@ -5,7 +5,7 @@ expression: sanitized
 /status

 ╭───────────────────────────────────────────────────────────────────╮
-│  >_ LLMX (v0.1.5)                                                 │
+│  >_ LLMX (v0.1.7)                                                 │
 │                                                                   │
 │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date      │
 │ information on rate limits and credits                            │
--- a/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_truncates_in_narrow_terminal.snap
+++ b/llmx-rs/tui/src/status/snapshots/llmx_tuistatustests__status_snapshot_truncates_in_narrow_terminal.snap
@@ -5,7 +5,7 @@ expression: sanitized
 /status

 ╭────────────────────────────────────────────╮
-│  >_ LLMX (v0.1.5)                          │
+│  >_ LLMX (v0.1.7)                          │
 │                                            │
 │ Visit https://chatgpt.com/llmx/settings/   │
 │ usage for up-to-date                       │
--- a/llmx-rs/tui/tests/fixtures/binary-size-log.jsonl
+++ b/llmx-rs/tui/tests/fixtures/binary-size-log.jsonl