Compare commits
7 Commits
rust-v0.1.
...
rust-v0.1.
| Author | SHA1 | Date | |
|---|---|---|---|
| ee75cfaa7f | |||
| 085d8c9343 | |||
| 462b219d3f | |||
| 63de226119 | |||
| 7d2842885a | |||
| 67ff31104f | |||
| 866ca2a372 |
1380
llmx-rs/Cargo.lock
generated
1380
llmx-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -43,7 +43,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.1.2"
|
||||
version = "0.1.6"
|
||||
# Track the edition for all workspace crates in one place. Individual
|
||||
# crates can still override this value, but keeping it here means new
|
||||
# crates created with `cargo new -w ...` automatically inherit the 2024
|
||||
|
||||
@@ -138,7 +138,7 @@ impl McpProcess {
|
||||
client_info: ClientInfo {
|
||||
name: "llmx-app-server-tests".to_string(),
|
||||
title: None,
|
||||
version: "0.1.2".to_string(),
|
||||
version: "0.1.6".to_string(),
|
||||
},
|
||||
})?);
|
||||
let req_id = self.send_request("initialize", params).await?;
|
||||
|
||||
@@ -26,7 +26,7 @@ async fn get_user_agent_returns_current_llmx_user_agent() -> Result<()> {
|
||||
|
||||
let os_info = os_info::get();
|
||||
let user_agent = format!(
|
||||
"llmx_cli_rs/0.1.2 ({} {}; {}) {} (llmx-app-server-tests; 0.1.2)",
|
||||
"llmx_cli_rs/0.1.6 ({} {}; {}) {} (llmx-app-server-tests; 0.1.6)",
|
||||
os_info.os_type(),
|
||||
os_info.version(),
|
||||
os_info.architecture().unwrap_or("unknown"),
|
||||
|
||||
@@ -56,7 +56,12 @@ pub(crate) async fn stream_chat_completions(
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(model_family);
|
||||
messages.push(json!({"role": "system", "content": full_instructions}));
|
||||
// Add cache_control to system instructions for Anthropic prompt caching
|
||||
messages.push(json!({
|
||||
"role": "system",
|
||||
"content": full_instructions,
|
||||
"cache_control": {"type": "ephemeral"}
|
||||
}));
|
||||
|
||||
let input = prompt.get_formatted_input();
|
||||
|
||||
@@ -161,7 +166,65 @@ pub(crate) async fn stream_chat_completions(
|
||||
// aggregated assistant message was recorded alongside an earlier partial).
|
||||
let mut last_assistant_text: Option<String> = None;
|
||||
|
||||
// Build a map of which call_ids have outputs
|
||||
// We'll use this to ensure we never send a FunctionCall without its corresponding output
|
||||
let mut call_ids_with_outputs: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
|
||||
// First pass: collect all call_ids that have outputs
|
||||
for item in input.iter() {
|
||||
if let ResponseItem::FunctionCallOutput { call_id, .. } = item {
|
||||
call_ids_with_outputs.insert(call_id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
debug!("=== Chat Completions Request Debug ===");
|
||||
debug!("Input items count: {}", input.len());
|
||||
debug!("Call IDs with outputs: {:?}", call_ids_with_outputs);
|
||||
|
||||
// Second pass: find the first FunctionCall that doesn't have an output
|
||||
let mut cutoff_at_idx: Option<usize> = None;
|
||||
for (idx, item) in input.iter().enumerate() {
|
||||
if let ResponseItem::FunctionCall { call_id, name, .. } = item {
|
||||
if !call_ids_with_outputs.contains(call_id) {
|
||||
debug!("Found unanswered function call '{}' (call_id: {}) at index {}", name, call_id, idx);
|
||||
cutoff_at_idx = Some(idx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(cutoff) = cutoff_at_idx {
|
||||
debug!("Cutting off at index {} to avoid orphaned tool calls", cutoff);
|
||||
} else {
|
||||
debug!("No unanswered function calls found, processing all items");
|
||||
}
|
||||
|
||||
// Track whether the MOST RECENT FunctionCall with each call_id was skipped
|
||||
// This allows the same call_id to be retried - we only skip outputs for the specific skipped calls
|
||||
let mut call_id_skip_state: std::collections::HashMap<String, bool> = std::collections::HashMap::new();
|
||||
|
||||
for (idx, item) in input.iter().enumerate() {
|
||||
// Stop processing if we've reached an unanswered function call
|
||||
if let Some(cutoff) = cutoff_at_idx {
|
||||
if idx >= cutoff {
|
||||
debug!("Stopping at index {} due to unanswered function call", idx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Processing item {} of type: {}", idx, match item {
|
||||
ResponseItem::Message { role, .. } => format!("Message(role={})", role),
|
||||
ResponseItem::FunctionCall { name, call_id, .. } => format!("FunctionCall(name={}, call_id={})", name, call_id),
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => format!("FunctionCallOutput(call_id={})", call_id),
|
||||
ResponseItem::LocalShellCall { .. } => "LocalShellCall".to_string(),
|
||||
ResponseItem::CustomToolCall { .. } => "CustomToolCall".to_string(),
|
||||
ResponseItem::CustomToolCallOutput { .. } => "CustomToolCallOutput".to_string(),
|
||||
ResponseItem::Reasoning { .. } => "Reasoning".to_string(),
|
||||
ResponseItem::WebSearchCall { .. } => "WebSearchCall".to_string(),
|
||||
ResponseItem::GhostSnapshot { .. } => "GhostSnapshot".to_string(),
|
||||
ResponseItem::Other => "Other".to_string(),
|
||||
});
|
||||
|
||||
match item {
|
||||
ResponseItem::Message { role, content, .. } => {
|
||||
// Build content either as a plain string (typical for assistant text)
|
||||
@@ -175,7 +238,10 @@ pub(crate) async fn stream_chat_completions(
|
||||
ContentItem::InputText { text: t }
|
||||
| ContentItem::OutputText { text: t } => {
|
||||
text.push_str(t);
|
||||
items.push(json!({"type":"text","text": t}));
|
||||
// Only add text content blocks that are non-empty
|
||||
if !t.trim().is_empty() {
|
||||
items.push(json!({"type":"text","text": t}));
|
||||
}
|
||||
}
|
||||
ContentItem::InputImage { image_url } => {
|
||||
saw_image = true;
|
||||
@@ -184,6 +250,11 @@ pub(crate) async fn stream_chat_completions(
|
||||
}
|
||||
}
|
||||
|
||||
// Skip messages with empty or whitespace-only text content (unless they contain images)
|
||||
if text.trim().is_empty() && !saw_image {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip exact-duplicate assistant messages.
|
||||
if role == "assistant" {
|
||||
if let Some(prev) = &last_assistant_text
|
||||
@@ -219,6 +290,18 @@ pub(crate) async fn stream_chat_completions(
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
// Validate that arguments is valid JSON before sending to API
|
||||
// If invalid, skip this function call to avoid API errors
|
||||
if serde_json::from_str::<serde_json::Value>(arguments).is_err() {
|
||||
debug!("Skipping malformed function call with invalid JSON arguments: {}", arguments);
|
||||
// Mark this call_id's most recent state as skipped
|
||||
call_id_skip_state.insert(call_id.clone(), true);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Mark this call_id's most recent state as NOT skipped (valid call)
|
||||
call_id_skip_state.insert(call_id.clone(), false);
|
||||
|
||||
let mut msg = json!({
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
@@ -263,6 +346,12 @@ pub(crate) async fn stream_chat_completions(
|
||||
messages.push(msg);
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
// Skip outputs only if the MOST RECENT FunctionCall with this call_id was skipped
|
||||
if call_id_skip_state.get(call_id) == Some(&true) {
|
||||
debug!("Skipping function call output for most recent skipped call_id: {}", call_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Prefer structured content items when available (e.g., images)
|
||||
// otherwise fall back to the legacy plain-string content.
|
||||
let content_value = if let Some(items) = &output.content_items {
|
||||
@@ -328,14 +417,37 @@ pub(crate) async fn stream_chat_completions(
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Built {} messages for API request", messages.len());
|
||||
|
||||
// Add cache_control to conversation history for Anthropic prompt caching
|
||||
// Add it to a message that's at least 3 messages before the end (stable history)
|
||||
// This caches the earlier conversation while keeping recent turns uncached
|
||||
if messages.len() > 4 {
|
||||
let cache_idx = messages.len().saturating_sub(4);
|
||||
if let Some(msg) = messages.get_mut(cache_idx) {
|
||||
if let Some(obj) = msg.as_object_mut() {
|
||||
obj.insert("cache_control".to_string(), json!({"type": "ephemeral"}));
|
||||
debug!("Added cache_control to message at index {} (conversation history)", cache_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("=== End Chat Completions Request Debug ===");
|
||||
|
||||
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
|
||||
let payload = json!({
|
||||
let mut payload = json!({
|
||||
"model": model_family.slug,
|
||||
"messages": messages,
|
||||
"stream": true,
|
||||
"tools": tools_json,
|
||||
});
|
||||
|
||||
// Add max_tokens - required by Anthropic Messages API
|
||||
// Use a sensible default of 8192 if not configured
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
obj.insert("max_tokens".to_string(), json!(8192));
|
||||
}
|
||||
|
||||
debug!(
|
||||
"POST to {}: {}",
|
||||
provider.get_full_url(&None),
|
||||
|
||||
@@ -693,7 +693,7 @@ pub(crate) fn create_tools_json_for_chat_completions_api(
|
||||
// We start with the JSON for the Responses API and than rewrite it to match
|
||||
// the chat completions tool call format.
|
||||
let responses_api_tools_json = create_tools_json_for_responses_api(tools)?;
|
||||
let tools_json = responses_api_tools_json
|
||||
let mut tools_json = responses_api_tools_json
|
||||
.into_iter()
|
||||
.filter_map(|mut tool| {
|
||||
if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) {
|
||||
@@ -712,6 +712,14 @@ pub(crate) fn create_tools_json_for_chat_completions_api(
|
||||
}
|
||||
})
|
||||
.collect::<Vec<serde_json::Value>>();
|
||||
|
||||
// Add cache_control to the last tool to enable Anthropic prompt caching
|
||||
if let Some(last_tool) = tools_json.last_mut() {
|
||||
if let Some(obj) = last_tool.as_object_mut() {
|
||||
obj.insert("cache_control".to_string(), json!({"type": "ephemeral"}));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tools_json)
|
||||
}
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ impl McpProcess {
|
||||
let initialized = self.read_jsonrpc_message().await?;
|
||||
let os_info = os_info::get();
|
||||
let user_agent = format!(
|
||||
"llmx_cli_rs/0.1.2 ({} {}; {}) {} (elicitation test; 0.0.0)",
|
||||
"llmx_cli_rs/0.1.6 ({} {}; {}) {} (elicitation test; 0.0.0)",
|
||||
os_info.os_type(),
|
||||
os_info.version(),
|
||||
os_info.architecture().unwrap_or("unknown"),
|
||||
@@ -163,7 +163,7 @@ impl McpProcess {
|
||||
"serverInfo": {
|
||||
"name": "llmx-mcp-server",
|
||||
"title": "LLMX",
|
||||
"version": "0.1.2",
|
||||
"version": "0.1.6",
|
||||
"user_agent": user_agent
|
||||
},
|
||||
"protocolVersion": mcp_types::MCP_SCHEMA_VERSION
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭───────────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.2) │
|
||||
│ >_ LLMX (v0.1.6) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.2) │
|
||||
│ >_ LLMX (v0.1.6) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭──────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.2) │
|
||||
│ >_ LLMX (v0.1.6) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭──────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.2) │
|
||||
│ >_ LLMX (v0.1.6) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭───────────────────────────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.2) │
|
||||
│ >_ LLMX (v0.1.6) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
|
||||
@@ -5,7 +5,7 @@ expression: sanitized
|
||||
/status
|
||||
|
||||
╭────────────────────────────────────────────╮
|
||||
│ >_ LLMX (v0.1.2) │
|
||||
│ >_ LLMX (v0.1.6) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/llmx/settings/ │
|
||||
│ usage for up-to-date │
|
||||
|
||||
10
llmx-rs/tui/tests/fixtures/binary-size-log.jsonl
vendored
10
llmx-rs/tui/tests/fixtures/binary-size-log.jsonl
vendored
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user