Centralize truncation in conversation history (#5652)

move the truncation logic to conversation history to use on any tool
output. This will help us in avoiding edge cases while truncating the
tool calls and mcp calls.
This commit is contained in:
Ahmed Ibrahim
2025-10-27 14:05:35 -07:00
committed by GitHub
parent 0fc295d958
commit 7226365397
6 changed files with 588 additions and 365 deletions

View File

@@ -33,6 +33,7 @@ mod stream_no_completed;
mod tool_harness;
mod tool_parallelism;
mod tools;
mod truncation;
mod unified_exec;
mod user_notification;
mod view_image;

View File

@@ -0,0 +1,270 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Context;
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::SandboxPolicy;
use core_test_support::assert_regex_match;
use core_test_support::responses;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use escargot::CargoBuild;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use wiremock::matchers::any;
// Verifies byte-truncation formatting for function error output (RespondToModel errors)
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
// Use the test model that wires function tools like grep_files
config.model = "test-gpt-5-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
});
let test = builder.build(&server).await?;
// Construct a very long, non-existent path to force a RespondToModel error with a large message
let long_path = "a".repeat(20_000);
let call_id = "grep-huge-error";
let args = json!({
"pattern": "alpha",
"path": long_path,
"limit": 10
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
"trigger grep_files with long path to test truncation",
SandboxPolicy::DangerFullAccess,
)
.await?;
let output = mock
.function_call_output_text(call_id)
.context("function error output present")?;
tracing::debug!(output = %output, "truncated function error output");
// Expect plaintext with byte-truncation marker and no omitted-lines marker
assert!(
serde_json::from_str::<serde_json::Value>(&output).is_err(),
"expected error output to be plain text",
);
let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#;
assert_regex_match(truncated_pattern, &output);
assert!(
!output.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {output}"
);
Ok(())
}
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
});
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
let args = serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 400"],
"timeout_ms": 5_000,
});
// First response: model tells us to run the tool; second: complete the turn.
mount_sse_once_match(
&server,
any(),
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once_match(
&server,
any(),
sse(vec![
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
.await?;
// Inspect what we sent back to the model; it should contain a truncated
// function_call_output for the shell call.
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
// Expect plain text (not JSON) with truncation markers and line elision.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
let truncated_pattern = r#"(?s)^Exit code: 0
Wall time: .* seconds
Total output lines: 400
Output:
1
2
3
4
5
6
.*
\[\.{3} omitted 144 of 400 lines \.{3}\]
.*
396
397
398
399
400
$"#;
assert_regex_match(truncated_pattern, &output);
Ok(())
}
// Verifies that an MCP tool call result exceeding the model formatting limits
// is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let call_id = "rmcp-truncated";
let server_name = "rmcp";
let tool_name = format!("mcp__{server_name}__echo");
// Build a very large message to exceed 10KiB once serialized.
let large_msg = "long-message-with-newlines-".repeat(600);
let args_json = serde_json::json!({ "message": large_msg });
mount_sse_once_match(
&server,
any(),
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once_match(
&server,
any(),
sse(vec![
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
responses::ev_completed("resp-2"),
]),
)
.await;
// Compile the rmcp stdio test server and configure it.
let rmcp_test_server_bin = CargoBuild::new()
.package("codex-rmcp-client")
.bin("test_stdio_server")
.run()?
.path()
.to_string_lossy()
.into_owned();
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::RmcpClient);
config.mcp_servers.insert(
server_name.to_string(),
codex_core::config_types::McpServerConfig {
transport: codex_core::config_types::McpServerTransportConfig::Stdio {
command: rmcp_test_server_bin,
args: Vec::new(),
env: None,
env_vars: Vec::new(),
cwd: None,
},
enabled: true,
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
},
);
});
let fixture = builder.build(&server).await?;
fixture
.submit_turn_with_policy(
"call the rmcp echo tool with a very large message",
SandboxPolicy::ReadOnly,
)
.await?;
// The MCP tool call output is converted to a function_call_output for the model.
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for rmcp call")?;
// Expect plain text with byte-based truncation marker.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated MCP output to be plain text"
);
assert!(
output.starts_with("Total output lines: 1\n\n{"),
"expected total line header and JSON head, got: {output}"
);
let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]")
.expect("compile regex");
assert!(
byte_marker.is_match(&output),
"expected byte truncation marker, got: {output}"
);
Ok(())
}