1 Commits

Author SHA1 Message Date
dependabot[bot]
a899aadea5 chore(deps): bump schemars from 0.8.22 to 1.0.4 in /llmx-rs
Bumps [schemars](https://github.com/GREsau/schemars) from 0.8.22 to 1.0.4.
- [Release notes](https://github.com/GREsau/schemars/releases)
- [Changelog](https://github.com/GREsau/schemars/blob/master/CHANGELOG.md)
- [Commits](https://github.com/GREsau/schemars/compare/v0.8.22...v1.0.4)

---
updated-dependencies:
- dependency-name: schemars
  dependency-version: 1.0.4
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-14 18:56:09 +00:00
23 changed files with 680 additions and 898 deletions

1404
llmx-rs/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -43,7 +43,7 @@ members = [
resolver = "2" resolver = "2"
[workspace.package] [workspace.package]
version = "0.1.7" version = "0.1.4"
# Track the edition for all workspace crates in one place. Individual # Track the edition for all workspace crates in one place. Individual
# crates can still override this value, but keeping it here means new # crates can still override this value, but keeping it here means new
# crates created with `cargo new -w ...` automatically inherit the 2024 # crates created with `cargo new -w ...` automatically inherit the 2024
@@ -162,7 +162,7 @@ ratatui-macros = "0.6.0"
regex-lite = "0.1.7" regex-lite = "0.1.7"
reqwest = "0.12" reqwest = "0.12"
rmcp = { version = "0.8.5", default-features = false } rmcp = { version = "0.8.5", default-features = false }
schemars = "0.8.22" schemars = "1.0.4"
seccompiler = "0.5.0" seccompiler = "0.5.0"
sentry = "0.34.0" sentry = "0.34.0"
serde = "1" serde = "1"

View File

@@ -138,7 +138,7 @@ impl McpProcess {
client_info: ClientInfo { client_info: ClientInfo {
name: "llmx-app-server-tests".to_string(), name: "llmx-app-server-tests".to_string(),
title: None, title: None,
version: "0.1.7".to_string(), version: "0.1.4".to_string(),
}, },
})?); })?);
let req_id = self.send_request("initialize", params).await?; let req_id = self.send_request("initialize", params).await?;

View File

@@ -26,7 +26,7 @@ async fn get_user_agent_returns_current_llmx_user_agent() -> Result<()> {
let os_info = os_info::get(); let os_info = os_info::get();
let user_agent = format!( let user_agent = format!(
"llmx_cli_rs/0.1.7 ({} {}; {}) {} (llmx-app-server-tests; 0.1.7)", "llmx_cli_rs/0.1.4 ({} {}; {}) {} (llmx-app-server-tests; 0.1.4)",
os_info.os_type(), os_info.os_type(),
os_info.version(), os_info.version(),
os_info.architecture().unwrap_or("unknown"), os_info.architecture().unwrap_or("unknown"),

View File

@@ -56,12 +56,7 @@ pub(crate) async fn stream_chat_completions(
let mut messages = Vec::<serde_json::Value>::new(); let mut messages = Vec::<serde_json::Value>::new();
let full_instructions = prompt.get_full_instructions(model_family); let full_instructions = prompt.get_full_instructions(model_family);
// Add cache_control to system instructions for Anthropic prompt caching messages.push(json!({"role": "system", "content": full_instructions}));
messages.push(json!({
"role": "system",
"content": full_instructions,
"cache_control": {"type": "ephemeral"}
}));
let input = prompt.get_formatted_input(); let input = prompt.get_formatted_input();
@@ -166,65 +161,10 @@ pub(crate) async fn stream_chat_completions(
// aggregated assistant message was recorded alongside an earlier partial). // aggregated assistant message was recorded alongside an earlier partial).
let mut last_assistant_text: Option<String> = None; let mut last_assistant_text: Option<String> = None;
// Build a map of which call_ids have outputs // Track call_ids of skipped function calls so we can also skip their outputs
// We'll use this to ensure we never send a FunctionCall without its corresponding output let mut skipped_call_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut call_ids_with_outputs: std::collections::HashSet<String> = std::collections::HashSet::new();
// First pass: collect all call_ids that have outputs
for item in input.iter() {
if let ResponseItem::FunctionCallOutput { call_id, .. } = item {
call_ids_with_outputs.insert(call_id.clone());
}
}
debug!("=== Chat Completions Request Debug ===");
debug!("Input items count: {}", input.len());
debug!("Call IDs with outputs: {:?}", call_ids_with_outputs);
// Second pass: find the first FunctionCall that doesn't have an output
let mut cutoff_at_idx: Option<usize> = None;
for (idx, item) in input.iter().enumerate() {
if let ResponseItem::FunctionCall { call_id, name, .. } = item {
if !call_ids_with_outputs.contains(call_id) {
debug!("Found unanswered function call '{}' (call_id: {}) at index {}", name, call_id, idx);
cutoff_at_idx = Some(idx);
break;
}
}
}
if let Some(cutoff) = cutoff_at_idx {
debug!("Cutting off at index {} to avoid orphaned tool calls", cutoff);
} else {
debug!("No unanswered function calls found, processing all items");
}
// Track whether the MOST RECENT FunctionCall with each call_id was skipped
// This allows the same call_id to be retried - we only skip outputs for the specific skipped calls
let mut call_id_skip_state: std::collections::HashMap<String, bool> = std::collections::HashMap::new();
for (idx, item) in input.iter().enumerate() { for (idx, item) in input.iter().enumerate() {
// Stop processing if we've reached an unanswered function call
if let Some(cutoff) = cutoff_at_idx {
if idx >= cutoff {
debug!("Stopping at index {} due to unanswered function call", idx);
break;
}
}
debug!("Processing item {} of type: {}", idx, match item {
ResponseItem::Message { role, .. } => format!("Message(role={})", role),
ResponseItem::FunctionCall { name, call_id, .. } => format!("FunctionCall(name={}, call_id={})", name, call_id),
ResponseItem::FunctionCallOutput { call_id, .. } => format!("FunctionCallOutput(call_id={})", call_id),
ResponseItem::LocalShellCall { .. } => "LocalShellCall".to_string(),
ResponseItem::CustomToolCall { .. } => "CustomToolCall".to_string(),
ResponseItem::CustomToolCallOutput { .. } => "CustomToolCallOutput".to_string(),
ResponseItem::Reasoning { .. } => "Reasoning".to_string(),
ResponseItem::WebSearchCall { .. } => "WebSearchCall".to_string(),
ResponseItem::GhostSnapshot { .. } => "GhostSnapshot".to_string(),
ResponseItem::Other => "Other".to_string(),
});
match item { match item {
ResponseItem::Message { role, content, .. } => { ResponseItem::Message { role, content, .. } => {
// Build content either as a plain string (typical for assistant text) // Build content either as a plain string (typical for assistant text)
@@ -294,14 +234,11 @@ pub(crate) async fn stream_chat_completions(
// If invalid, skip this function call to avoid API errors // If invalid, skip this function call to avoid API errors
if serde_json::from_str::<serde_json::Value>(arguments).is_err() { if serde_json::from_str::<serde_json::Value>(arguments).is_err() {
debug!("Skipping malformed function call with invalid JSON arguments: {}", arguments); debug!("Skipping malformed function call with invalid JSON arguments: {}", arguments);
// Mark this call_id's most recent state as skipped // Track this call_id so we can also skip its corresponding output
call_id_skip_state.insert(call_id.clone(), true); skipped_call_ids.insert(call_id.clone());
continue; continue;
} }
// Mark this call_id's most recent state as NOT skipped (valid call)
call_id_skip_state.insert(call_id.clone(), false);
let mut msg = json!({ let mut msg = json!({
"role": "assistant", "role": "assistant",
"content": null, "content": null,
@@ -346,9 +283,9 @@ pub(crate) async fn stream_chat_completions(
messages.push(msg); messages.push(msg);
} }
ResponseItem::FunctionCallOutput { call_id, output } => { ResponseItem::FunctionCallOutput { call_id, output } => {
// Skip outputs only if the MOST RECENT FunctionCall with this call_id was skipped // Skip outputs for function calls that were skipped due to malformed arguments
if call_id_skip_state.get(call_id) == Some(&true) { if skipped_call_ids.contains(call_id) {
debug!("Skipping function call output for most recent skipped call_id: {}", call_id); debug!("Skipping function call output for skipped call_id: {}", call_id);
continue; continue;
} }
@@ -417,39 +354,14 @@ pub(crate) async fn stream_chat_completions(
} }
} }
debug!("Built {} messages for API request", messages.len());
// Add cache_control to conversation history for Anthropic prompt caching
// Add it to a message that's at least 3 messages before the end (stable history)
// This caches the earlier conversation while keeping recent turns uncached
if messages.len() > 4 {
let cache_idx = messages.len().saturating_sub(4);
if let Some(msg) = messages.get_mut(cache_idx) {
if let Some(obj) = msg.as_object_mut() {
obj.insert("cache_control".to_string(), json!({"type": "ephemeral"}));
debug!("Added cache_control to message at index {} (conversation history)", cache_idx);
}
}
}
debug!("=== End Chat Completions Request Debug ===");
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?; let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
let mut payload = json!({ let payload = json!({
"model": model_family.slug, "model": model_family.slug,
"messages": messages, "messages": messages,
"stream": true, "stream": true,
"tools": tools_json, "tools": tools_json,
}); });
// Add max_tokens - required by Anthropic Messages API
// Use provider config value or default to 8192
let max_tokens = provider.max_tokens.unwrap_or(8192);
if let Some(obj) = payload.as_object_mut() {
obj.insert("max_tokens".to_string(), json!(max_tokens));
}
debug!("Using max_tokens: {}", max_tokens);
debug!( debug!(
"POST to {}: {}", "POST to {}: {}",
provider.get_full_url(&None), provider.get_full_url(&None),

View File

@@ -1123,7 +1123,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1188,7 +1187,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1226,7 +1224,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1266,7 +1263,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1302,7 +1298,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1338,7 +1333,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1443,7 +1437,6 @@ mod tests {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000), stream_idle_timeout_ms: Some(1000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -2809,7 +2809,6 @@ model_verbosity = "high"
request_max_retries: Some(4), request_max_retries: Some(4),
stream_max_retries: Some(10), stream_max_retries: Some(10),
stream_idle_timeout_ms: Some(300_000), stream_idle_timeout_ms: Some(300_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
let model_provider_map = { let model_provider_map = {

View File

@@ -87,10 +87,6 @@ pub struct ModelProviderInfo {
/// the connection as lost. /// the connection as lost.
pub stream_idle_timeout_ms: Option<u64>, pub stream_idle_timeout_ms: Option<u64>,
/// Maximum number of tokens to generate in the response. If not specified, defaults to 8192.
/// This is required by some providers (e.g., Anthropic via LiteLLM).
pub max_tokens: Option<i64>,
/// Does this provider require an OpenAI API Key or ChatGPT login token? If true, /// Does this provider require an OpenAI API Key or ChatGPT login token? If true,
/// user is presented with login screen on first run, and login preference and token/key /// user is presented with login screen on first run, and login preference and token/key
/// are stored in auth.json. If false (which is the default), login screen is skipped, /// are stored in auth.json. If false (which is the default), login screen is skipped,
@@ -294,7 +290,6 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
request_max_retries: None, request_max_retries: None,
stream_max_retries: None, stream_max_retries: None,
stream_idle_timeout_ms: None, stream_idle_timeout_ms: None,
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}, },
), ),
@@ -335,7 +330,6 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
request_max_retries: None, request_max_retries: None,
stream_max_retries: None, stream_max_retries: None,
stream_idle_timeout_ms: None, stream_idle_timeout_ms: None,
max_tokens: None,
requires_openai_auth: true, requires_openai_auth: true,
}, },
), ),
@@ -381,7 +375,6 @@ pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
request_max_retries: None, request_max_retries: None,
stream_max_retries: None, stream_max_retries: None,
stream_idle_timeout_ms: None, stream_idle_timeout_ms: None,
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
} }
} }

View File

@@ -693,7 +693,7 @@ pub(crate) fn create_tools_json_for_chat_completions_api(
// We start with the JSON for the Responses API and than rewrite it to match // We start with the JSON for the Responses API and than rewrite it to match
// the chat completions tool call format. // the chat completions tool call format.
let responses_api_tools_json = create_tools_json_for_responses_api(tools)?; let responses_api_tools_json = create_tools_json_for_responses_api(tools)?;
let mut tools_json = responses_api_tools_json let tools_json = responses_api_tools_json
.into_iter() .into_iter()
.filter_map(|mut tool| { .filter_map(|mut tool| {
if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) { if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) {
@@ -712,14 +712,6 @@ pub(crate) fn create_tools_json_for_chat_completions_api(
} }
}) })
.collect::<Vec<serde_json::Value>>(); .collect::<Vec<serde_json::Value>>();
// Add cache_control to the last tool to enable Anthropic prompt caching
if let Some(last_tool) = tools_json.last_mut() {
if let Some(obj) = last_tool.as_object_mut() {
obj.insert("cache_control".to_string(), json!({"type": "ephemeral"}));
}
}
Ok(tools_json) Ok(tools_json)
} }

View File

@@ -58,7 +58,6 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000), stream_idle_timeout_ms: Some(5_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -58,7 +58,6 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000), stream_idle_timeout_ms: Some(5_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -47,7 +47,6 @@ async fn responses_stream_includes_subagent_header_on_review() {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000), stream_idle_timeout_ms: Some(5_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -136,7 +135,6 @@ async fn responses_stream_includes_subagent_header_on_other() {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000), stream_idle_timeout_ms: Some(5_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -712,7 +712,6 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(0), stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000), stream_idle_timeout_ms: Some(5_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1196,7 +1195,6 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
request_max_retries: None, request_max_retries: None,
stream_max_retries: None, stream_max_retries: None,
stream_idle_timeout_ms: None, stream_idle_timeout_ms: None,
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };
@@ -1274,7 +1272,6 @@ async fn env_var_overrides_loaded_auth() {
request_max_retries: None, request_max_retries: None,
stream_max_retries: None, stream_max_retries: None,
stream_idle_timeout_ms: None, stream_idle_timeout_ms: None,
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -72,7 +72,6 @@ async fn continue_after_stream_error() {
request_max_retries: Some(1), request_max_retries: Some(1),
stream_max_retries: Some(1), stream_max_retries: Some(1),
stream_idle_timeout_ms: Some(2_000), stream_idle_timeout_ms: Some(2_000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -80,7 +80,6 @@ async fn retries_on_early_close() {
request_max_retries: Some(0), request_max_retries: Some(0),
stream_max_retries: Some(1), stream_max_retries: Some(1),
stream_idle_timeout_ms: Some(2000), stream_idle_timeout_ms: Some(2000),
max_tokens: None,
requires_openai_auth: false, requires_openai_auth: false,
}; };

View File

@@ -144,7 +144,7 @@ impl McpProcess {
let initialized = self.read_jsonrpc_message().await?; let initialized = self.read_jsonrpc_message().await?;
let os_info = os_info::get(); let os_info = os_info::get();
let user_agent = format!( let user_agent = format!(
"llmx_cli_rs/0.1.7 ({} {}; {}) {} (elicitation test; 0.0.0)", "llmx_cli_rs/0.1.4 ({} {}; {}) {} (elicitation test; 0.0.0)",
os_info.os_type(), os_info.os_type(),
os_info.version(), os_info.version(),
os_info.architecture().unwrap_or("unknown"), os_info.architecture().unwrap_or("unknown"),
@@ -163,7 +163,7 @@ impl McpProcess {
"serverInfo": { "serverInfo": {
"name": "llmx-mcp-server", "name": "llmx-mcp-server",
"title": "LLMX", "title": "LLMX",
"version": "0.1.7", "version": "0.1.4",
"user_agent": user_agent "user_agent": user_agent
}, },
"protocolVersion": mcp_types::MCP_SCHEMA_VERSION "protocolVersion": mcp_types::MCP_SCHEMA_VERSION

View File

@@ -5,7 +5,7 @@ expression: sanitized
/status /status
╭───────────────────────────────────────────────────────────────────────────╮ ╭───────────────────────────────────────────────────────────────────────────╮
│ >_ LLMX (v0.1.7) │ │ >_ LLMX (v0.1.4) │
│ │ │ │
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │ │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
│ information on rate limits and credits │ │ information on rate limits and credits │

View File

@@ -5,7 +5,7 @@ expression: sanitized
/status /status
╭─────────────────────────────────────────────────────────────────╮ ╭─────────────────────────────────────────────────────────────────╮
│ >_ LLMX (v0.1.7) │ │ >_ LLMX (v0.1.4) │
│ │ │ │
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │ │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
│ information on rate limits and credits │ │ information on rate limits and credits │

View File

@@ -5,7 +5,7 @@ expression: sanitized
/status /status
╭──────────────────────────────────────────────────────────────╮ ╭──────────────────────────────────────────────────────────────╮
│ >_ LLMX (v0.1.7) │ │ >_ LLMX (v0.1.4) │
│ │ │ │
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │ │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
│ information on rate limits and credits │ │ information on rate limits and credits │

View File

@@ -5,7 +5,7 @@ expression: sanitized
/status /status
╭──────────────────────────────────────────────────────────────╮ ╭──────────────────────────────────────────────────────────────╮
│ >_ LLMX (v0.1.7) │ │ >_ LLMX (v0.1.4) │
│ │ │ │
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │ │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
│ information on rate limits and credits │ │ information on rate limits and credits │

View File

@@ -5,7 +5,7 @@ expression: sanitized
/status /status
╭───────────────────────────────────────────────────────────────────╮ ╭───────────────────────────────────────────────────────────────────╮
│ >_ LLMX (v0.1.7) │ │ >_ LLMX (v0.1.4) │
│ │ │ │
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │ │ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
│ information on rate limits and credits │ │ information on rate limits and credits │

View File

@@ -5,7 +5,7 @@ expression: sanitized
/status /status
╭────────────────────────────────────────────╮ ╭────────────────────────────────────────────╮
│ >_ LLMX (v0.1.7) │ │ >_ LLMX (v0.1.4) │
│ │ │ │
│ Visit https://chatgpt.com/llmx/settings/ │ │ Visit https://chatgpt.com/llmx/settings/ │
│ usage for up-to-date │ │ usage for up-to-date │

File diff suppressed because one or more lines are too long