Truncate total tool calls text (#5979)
Put a cap on the aggregate output of text content on tool calls. --------- Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
This commit is contained in:
@@ -366,23 +366,10 @@ impl ConversationHistory {
|
|||||||
match item {
|
match item {
|
||||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||||
let truncated = format_output_for_model_body(output.content.as_str());
|
let truncated = format_output_for_model_body(output.content.as_str());
|
||||||
let truncated_items = output.content_items.as_ref().map(|items| {
|
let truncated_items = output
|
||||||
items
|
.content_items
|
||||||
.iter()
|
.as_ref()
|
||||||
.map(|it| match it {
|
.map(|items| globally_truncate_function_output_items(items));
|
||||||
FunctionCallOutputContentItem::InputText { text } => {
|
|
||||||
FunctionCallOutputContentItem::InputText {
|
|
||||||
text: format_output_for_model_body(text),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
|
||||||
FunctionCallOutputContentItem::InputImage {
|
|
||||||
image_url: image_url.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
});
|
|
||||||
ResponseItem::FunctionCallOutput {
|
ResponseItem::FunctionCallOutput {
|
||||||
call_id: call_id.clone(),
|
call_id: call_id.clone(),
|
||||||
output: FunctionCallOutputPayload {
|
output: FunctionCallOutputPayload {
|
||||||
@@ -411,6 +398,53 @@ impl ConversationHistory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn globally_truncate_function_output_items(
|
||||||
|
items: &[FunctionCallOutputContentItem],
|
||||||
|
) -> Vec<FunctionCallOutputContentItem> {
|
||||||
|
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||||
|
let mut remaining = MODEL_FORMAT_MAX_BYTES;
|
||||||
|
let mut omitted_text_items = 0usize;
|
||||||
|
|
||||||
|
for it in items {
|
||||||
|
match it {
|
||||||
|
FunctionCallOutputContentItem::InputText { text } => {
|
||||||
|
if remaining == 0 {
|
||||||
|
omitted_text_items += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = text.len();
|
||||||
|
if len <= remaining {
|
||||||
|
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||||
|
remaining -= len;
|
||||||
|
} else {
|
||||||
|
let slice = take_bytes_at_char_boundary(text, remaining);
|
||||||
|
if !slice.is_empty() {
|
||||||
|
out.push(FunctionCallOutputContentItem::InputText {
|
||||||
|
text: slice.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
remaining = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// todo(aibrahim): handle input images; resize
|
||||||
|
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||||
|
out.push(FunctionCallOutputContentItem::InputImage {
|
||||||
|
image_url: image_url.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if omitted_text_items > 0 {
|
||||||
|
out.push(FunctionCallOutputContentItem::InputText {
|
||||||
|
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn format_output_for_model_body(content: &str) -> String {
|
pub(crate) fn format_output_for_model_body(content: &str) -> String {
|
||||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||||
// Clients still receive full streams; only this formatted summary is capped.
|
// Clients still receive full streams; only this formatted summary is capped.
|
||||||
@@ -856,6 +890,81 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||||
|
// Arrange: several text items, none exceeding per-item limit, but total exceeds budget.
|
||||||
|
let budget = MODEL_FORMAT_MAX_BYTES;
|
||||||
|
let t1_len = (budget / 2).saturating_sub(10);
|
||||||
|
let t2_len = (budget / 2).saturating_sub(10);
|
||||||
|
let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len);
|
||||||
|
let t3_len = 50; // gets truncated to remaining_after_t1_t2
|
||||||
|
let t4_len = 5; // omitted
|
||||||
|
let t5_len = 7; // omitted
|
||||||
|
|
||||||
|
let t1 = "a".repeat(t1_len);
|
||||||
|
let t2 = "b".repeat(t2_len);
|
||||||
|
let t3 = "c".repeat(t3_len);
|
||||||
|
let t4 = "d".repeat(t4_len);
|
||||||
|
let t5 = "e".repeat(t5_len);
|
||||||
|
|
||||||
|
let item = ResponseItem::FunctionCallOutput {
|
||||||
|
call_id: "call-omit".to_string(),
|
||||||
|
output: FunctionCallOutputPayload {
|
||||||
|
content: "irrelevant".to_string(),
|
||||||
|
content_items: Some(vec![
|
||||||
|
FunctionCallOutputContentItem::InputText { text: t1 },
|
||||||
|
FunctionCallOutputContentItem::InputText { text: t2 },
|
||||||
|
FunctionCallOutputContentItem::InputImage {
|
||||||
|
image_url: "img:mid".to_string(),
|
||||||
|
},
|
||||||
|
FunctionCallOutputContentItem::InputText { text: t3 },
|
||||||
|
FunctionCallOutputContentItem::InputText { text: t4 },
|
||||||
|
FunctionCallOutputContentItem::InputText { text: t5 },
|
||||||
|
]),
|
||||||
|
success: Some(true),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut history = ConversationHistory::new();
|
||||||
|
history.record_items([&item]);
|
||||||
|
assert_eq!(history.items.len(), 1);
|
||||||
|
let json = serde_json::to_value(&history.items[0]).expect("serialize to json");
|
||||||
|
|
||||||
|
let output = json
|
||||||
|
.get("output")
|
||||||
|
.expect("output field")
|
||||||
|
.as_array()
|
||||||
|
.expect("array output");
|
||||||
|
|
||||||
|
// Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
|
||||||
|
assert_eq!(output.len(), 5);
|
||||||
|
|
||||||
|
let first = output[0].as_object().expect("first obj");
|
||||||
|
assert_eq!(first.get("type").unwrap(), "input_text");
|
||||||
|
let first_text = first.get("text").unwrap().as_str().unwrap();
|
||||||
|
assert_eq!(first_text.len(), t1_len);
|
||||||
|
|
||||||
|
let second = output[1].as_object().expect("second obj");
|
||||||
|
assert_eq!(second.get("type").unwrap(), "input_text");
|
||||||
|
let second_text = second.get("text").unwrap().as_str().unwrap();
|
||||||
|
assert_eq!(second_text.len(), t2_len);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
output[2],
|
||||||
|
serde_json::json!({"type": "input_image", "image_url": "img:mid"})
|
||||||
|
);
|
||||||
|
|
||||||
|
let fourth = output[3].as_object().expect("fourth obj");
|
||||||
|
assert_eq!(fourth.get("type").unwrap(), "input_text");
|
||||||
|
let fourth_text = fourth.get("text").unwrap().as_str().unwrap();
|
||||||
|
assert_eq!(fourth_text.len(), remaining_after_t1_t2);
|
||||||
|
|
||||||
|
let summary = output[4].as_object().expect("summary obj");
|
||||||
|
assert_eq!(summary.get("type").unwrap(), "input_text");
|
||||||
|
let summary_text = summary.get("text").unwrap().as_str().unwrap();
|
||||||
|
assert!(summary_text.contains("omitted 2 text items"));
|
||||||
|
}
|
||||||
|
|
||||||
//TODO(aibrahim): run CI in release mode.
|
//TODO(aibrahim): run CI in release mode.
|
||||||
#[cfg(not(debug_assertions))]
|
#[cfg(not(debug_assertions))]
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -3,9 +3,16 @@
|
|||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use codex_core::config::types::McpServerConfig;
|
||||||
|
use codex_core::config::types::McpServerTransportConfig;
|
||||||
use codex_core::features::Feature;
|
use codex_core::features::Feature;
|
||||||
use codex_core::model_family::find_family_for_model;
|
use codex_core::model_family::find_family_for_model;
|
||||||
|
use codex_core::protocol::AskForApproval;
|
||||||
|
use codex_core::protocol::EventMsg;
|
||||||
|
use codex_core::protocol::Op;
|
||||||
use codex_core::protocol::SandboxPolicy;
|
use codex_core::protocol::SandboxPolicy;
|
||||||
|
use codex_protocol::config_types::ReasoningSummary;
|
||||||
|
use codex_protocol::user_input::UserInput;
|
||||||
use core_test_support::assert_regex_match;
|
use core_test_support::assert_regex_match;
|
||||||
use core_test_support::responses;
|
use core_test_support::responses;
|
||||||
use core_test_support::responses::ev_assistant_message;
|
use core_test_support::responses::ev_assistant_message;
|
||||||
@@ -18,10 +25,13 @@ use core_test_support::responses::sse;
|
|||||||
use core_test_support::responses::start_mock_server;
|
use core_test_support::responses::start_mock_server;
|
||||||
use core_test_support::skip_if_no_network;
|
use core_test_support::skip_if_no_network;
|
||||||
use core_test_support::test_codex::test_codex;
|
use core_test_support::test_codex::test_codex;
|
||||||
|
use core_test_support::wait_for_event;
|
||||||
use escargot::CargoBuild;
|
use escargot::CargoBuild;
|
||||||
use regex_lite::Regex;
|
use regex_lite::Regex;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::time::Duration;
|
||||||
use wiremock::matchers::any;
|
use wiremock::matchers::any;
|
||||||
|
|
||||||
// Verifies byte-truncation formatting for function error output (RespondToModel errors)
|
// Verifies byte-truncation formatting for function error output (RespondToModel errors)
|
||||||
@@ -268,3 +278,105 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verifies that an MCP image tool output is serialized as content_items array with
|
||||||
|
// the image preserved and no truncation summary appended (since there are no text items).
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||||
|
async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
|
||||||
|
skip_if_no_network!(Ok(()));
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
|
||||||
|
let call_id = "rmcp-image-no-trunc";
|
||||||
|
let server_name = "rmcp";
|
||||||
|
let tool_name = format!("mcp__{server_name}__image");
|
||||||
|
|
||||||
|
mount_sse_once_match(
|
||||||
|
&server,
|
||||||
|
any(),
|
||||||
|
sse(vec![
|
||||||
|
ev_response_created("resp-1"),
|
||||||
|
ev_function_call(call_id, &tool_name, "{}"),
|
||||||
|
ev_completed("resp-1"),
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let final_mock = mount_sse_once_match(
|
||||||
|
&server,
|
||||||
|
any(),
|
||||||
|
sse(vec![
|
||||||
|
ev_assistant_message("msg-1", "done"),
|
||||||
|
ev_completed("resp-2"),
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Build the stdio rmcp server and pass a tiny PNG via data URL so it can construct ImageContent.
|
||||||
|
let rmcp_test_server_bin = CargoBuild::new()
|
||||||
|
.package("codex-rmcp-client")
|
||||||
|
.bin("test_stdio_server")
|
||||||
|
.run()?
|
||||||
|
.path()
|
||||||
|
.to_string_lossy()
|
||||||
|
.into_owned();
|
||||||
|
|
||||||
|
// 1x1 PNG data URL
|
||||||
|
let openai_png = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg==";
|
||||||
|
|
||||||
|
let mut builder = test_codex().with_config(move |config| {
|
||||||
|
config.features.enable(Feature::RmcpClient);
|
||||||
|
config.mcp_servers.insert(
|
||||||
|
server_name.to_string(),
|
||||||
|
McpServerConfig {
|
||||||
|
transport: McpServerTransportConfig::Stdio {
|
||||||
|
command: rmcp_test_server_bin,
|
||||||
|
args: Vec::new(),
|
||||||
|
env: Some(HashMap::from([(
|
||||||
|
"MCP_TEST_IMAGE_DATA_URL".to_string(),
|
||||||
|
openai_png.to_string(),
|
||||||
|
)])),
|
||||||
|
env_vars: Vec::new(),
|
||||||
|
cwd: None,
|
||||||
|
},
|
||||||
|
enabled: true,
|
||||||
|
startup_timeout_sec: Some(Duration::from_secs(10)),
|
||||||
|
tool_timeout_sec: None,
|
||||||
|
enabled_tools: None,
|
||||||
|
disabled_tools: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
});
|
||||||
|
let fixture = builder.build(&server).await?;
|
||||||
|
let session_model = fixture.session_configured.model.clone();
|
||||||
|
|
||||||
|
fixture
|
||||||
|
.codex
|
||||||
|
.submit(Op::UserTurn {
|
||||||
|
items: vec![UserInput::Text {
|
||||||
|
text: "call the rmcp image tool".into(),
|
||||||
|
}],
|
||||||
|
final_output_json_schema: None,
|
||||||
|
cwd: fixture.cwd.path().to_path_buf(),
|
||||||
|
approval_policy: AskForApproval::Never,
|
||||||
|
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||||
|
model: session_model,
|
||||||
|
effort: None,
|
||||||
|
summary: ReasoningSummary::Auto,
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Wait for completion to ensure the outbound request is captured.
|
||||||
|
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
let output_item = final_mock.single_request().function_call_output(call_id);
|
||||||
|
// Expect exactly one array element: the image item; and no trailing summary text.
|
||||||
|
let output = output_item.get("output").expect("output");
|
||||||
|
assert!(output.is_array(), "expected array output");
|
||||||
|
let arr = output.as_array().unwrap();
|
||||||
|
assert_eq!(arr.len(), 1, "no truncation summary should be appended");
|
||||||
|
assert_eq!(
|
||||||
|
arr[0],
|
||||||
|
json!({"type": "input_image", "image_url": openai_png})
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user