Centralize truncation in conversation history (#5652)
move the truncation logic to conversation history to use on any tool output. This will help us in avoiding edge cases while truncating the tool calls and mcp calls.
This commit is contained in:
@@ -2366,10 +2366,6 @@ mod tests {
|
||||
use crate::state::TaskKind;
|
||||
use crate::tasks::SessionTask;
|
||||
use crate::tasks::SessionTaskContext;
|
||||
use crate::tools::MODEL_FORMAT_HEAD_LINES;
|
||||
use crate::tools::MODEL_FORMAT_MAX_BYTES;
|
||||
use crate::tools::MODEL_FORMAT_MAX_LINES;
|
||||
use crate::tools::MODEL_FORMAT_TAIL_LINES;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
@@ -2456,95 +2452,6 @@ mod tests {
|
||||
assert_eq!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_truncation_head_tail_by_lines() {
|
||||
// Build 400 short lines so line-count limit, not byte budget, triggers truncation
|
||||
let lines: Vec<String> = (1..=400).map(|i| format!("line{i}")).collect();
|
||||
let full = lines.join("\n");
|
||||
|
||||
let exec = ExecToolCallOutput {
|
||||
exit_code: 0,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(full),
|
||||
duration: StdDuration::from_secs(1),
|
||||
timed_out: false,
|
||||
};
|
||||
|
||||
let out = format_exec_output_str(&exec);
|
||||
|
||||
// Strip truncation header if present for subsequent assertions
|
||||
let body = out
|
||||
.strip_prefix("Total output lines: ")
|
||||
.and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
|
||||
.unwrap_or(out.as_str());
|
||||
|
||||
// Expect elision marker with correct counts
|
||||
let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
|
||||
let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
|
||||
assert!(out.contains(&marker), "missing marker: {out}");
|
||||
|
||||
// Validate head and tail
|
||||
let parts: Vec<&str> = body.split(&marker).collect();
|
||||
assert_eq!(parts.len(), 2, "expected one marker split");
|
||||
let head = parts[0];
|
||||
let tail = parts[1];
|
||||
|
||||
let expected_head: String = (1..=MODEL_FORMAT_HEAD_LINES)
|
||||
.map(|i| format!("line{i}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
assert!(head.starts_with(&expected_head), "head mismatch");
|
||||
|
||||
let expected_tail: String = ((400 - MODEL_FORMAT_TAIL_LINES + 1)..=400)
|
||||
.map(|i| format!("line{i}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
assert!(tail.ends_with(&expected_tail), "tail mismatch");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_truncation_respects_byte_budget() {
|
||||
// Construct a large output (about 100kB) so byte budget dominates
|
||||
let big_line = "x".repeat(100);
|
||||
let full = std::iter::repeat_n(big_line, 1000)
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let exec = ExecToolCallOutput {
|
||||
exit_code: 0,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(full.clone()),
|
||||
duration: StdDuration::from_secs(1),
|
||||
timed_out: false,
|
||||
};
|
||||
|
||||
let out = format_exec_output_str(&exec);
|
||||
// Keep strict budget on the truncated body (excluding header)
|
||||
let body = out
|
||||
.strip_prefix("Total output lines: ")
|
||||
.and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
|
||||
.unwrap_or(out.as_str());
|
||||
assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
|
||||
assert!(out.contains("omitted"), "should contain elision marker");
|
||||
|
||||
// Ensure head and tail are drawn from the original
|
||||
assert!(full.starts_with(body.chars().take(8).collect::<String>().as_str()));
|
||||
assert!(
|
||||
full.ends_with(
|
||||
body.chars()
|
||||
.rev()
|
||||
.take(8)
|
||||
.collect::<String>()
|
||||
.chars()
|
||||
.rev()
|
||||
.collect::<String>()
|
||||
.as_str()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_timed_out_message() {
|
||||
let exec = ExecToolCallOutput {
|
||||
|
||||
@@ -2,9 +2,18 @@ use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
use std::ops::Deref;
|
||||
use tracing::error;
|
||||
|
||||
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
|
||||
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
|
||||
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ConversationHistory {
|
||||
@@ -47,7 +56,8 @@ impl ConversationHistory {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.items.push(item.clone());
|
||||
let processed = Self::process_item(&item);
|
||||
self.items.push(processed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,6 +78,22 @@ impl ConversationHistory {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items;
|
||||
}
|
||||
|
||||
pub(crate) fn update_token_info(
|
||||
&mut self,
|
||||
usage: &TokenUsage,
|
||||
model_context_window: Option<i64>,
|
||||
) {
|
||||
self.token_info = TokenUsageInfo::new_or_append(
|
||||
&self.token_info,
|
||||
&Some(usage.clone()),
|
||||
model_context_window,
|
||||
);
|
||||
}
|
||||
|
||||
/// This function enforces a couple of invariants on the in-memory history:
|
||||
/// 1. every call (function/custom) has a corresponding output entry
|
||||
/// 2. every output has a corresponding call entry
|
||||
@@ -253,10 +279,6 @@ impl ConversationHistory {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items;
|
||||
}
|
||||
|
||||
/// Removes the corresponding paired item for the provided `item`, if any.
|
||||
///
|
||||
/// Pairs:
|
||||
@@ -326,19 +348,108 @@ impl ConversationHistory {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn update_token_info(
|
||||
&mut self,
|
||||
usage: &TokenUsage,
|
||||
model_context_window: Option<i64>,
|
||||
) {
|
||||
self.token_info = TokenUsageInfo::new_or_append(
|
||||
&self.token_info,
|
||||
&Some(usage.clone()),
|
||||
model_context_window,
|
||||
);
|
||||
fn process_item(item: &ResponseItem) -> ResponseItem {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncated = format_output_for_model_body(output.content.as_str());
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncated,
|
||||
success: output.success,
|
||||
},
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, output } => {
|
||||
let truncated = format_output_for_model_body(output);
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncated,
|
||||
}
|
||||
}
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::GhostSnapshot { .. }
|
||||
| ResponseItem::Other => item.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn format_output_for_model_body(content: &str) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
|
||||
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
|
||||
let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error_or_panic(message: String) {
|
||||
if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
|
||||
@@ -533,6 +644,183 @@ mod tests {
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_truncates_function_call_output_content() {
|
||||
let mut history = ConversationHistory::new();
|
||||
let long_line = "a very long line to trigger truncation\n";
|
||||
let long_output = long_line.repeat(2_500);
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-100".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: long_output.clone(),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
|
||||
history.record_items([&item]);
|
||||
|
||||
assert_eq!(history.items.len(), 1);
|
||||
match &history.items[0] {
|
||||
ResponseItem::FunctionCallOutput { output, .. } => {
|
||||
assert_ne!(output.content, long_output);
|
||||
assert!(
|
||||
output.content.starts_with("Total output lines:"),
|
||||
"expected truncated summary, got {}",
|
||||
output.content
|
||||
);
|
||||
}
|
||||
other => panic!("unexpected history item: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_truncates_custom_tool_call_output_content() {
|
||||
let mut history = ConversationHistory::new();
|
||||
let line = "custom output that is very long\n";
|
||||
let long_output = line.repeat(2_500);
|
||||
let item = ResponseItem::CustomToolCallOutput {
|
||||
call_id: "tool-200".to_string(),
|
||||
output: long_output.clone(),
|
||||
};
|
||||
|
||||
history.record_items([&item]);
|
||||
|
||||
assert_eq!(history.items.len(), 1);
|
||||
match &history.items[0] {
|
||||
ResponseItem::CustomToolCallOutput { output, .. } => {
|
||||
assert_ne!(output, &long_output);
|
||||
assert!(
|
||||
output.starts_with("Total output lines:"),
|
||||
"expected truncated summary, got {output}"
|
||||
);
|
||||
}
|
||||
other => panic!("unexpected history item: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// The following tests were adapted from tools::mod truncation tests to
|
||||
// target the new truncation functions in conversation_history.
|
||||
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
let captures = regex
|
||||
.captures(message)
|
||||
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
|
||||
let body = captures
|
||||
.name("body")
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= MODEL_FORMAT_MAX_BYTES,
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
}
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
|
||||
let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = format_output_for_model_body(&large_error);
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
assert_truncated_message_matches(&truncated, line, total_lines);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = format_output_for_model_body(&long_line);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let marker_line =
|
||||
format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(format_output_for_model_body(&content), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = format_output_for_model_body(&content);
|
||||
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = format_output_for_model_body(&content);
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("output truncated to fit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
//TODO(aibrahim): run CI in release mode.
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
|
||||
@@ -19,7 +19,6 @@ use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::format_exec_output;
|
||||
use super::format_exec_output_str;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
@@ -146,7 +145,7 @@ impl ToolEmitter {
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
format_exec_output(&message),
|
||||
message.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -241,7 +240,7 @@ impl ToolEmitter {
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
format_exec_output(&message),
|
||||
message.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -277,7 +276,7 @@ impl ToolEmitter {
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let message = format!("execution error: {err:?}");
|
||||
let response = super::format_exec_output(&message);
|
||||
let response = message.clone();
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
@@ -289,9 +288,9 @@ impl ToolEmitter {
|
||||
} else {
|
||||
msg
|
||||
};
|
||||
let response = super::format_exec_output(&normalized);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
let response = &normalized;
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
|
||||
Err(FunctionCallError::RespondToModel(response.clone()))
|
||||
}
|
||||
};
|
||||
self.emit(ctx, event).await;
|
||||
|
||||
@@ -9,19 +9,11 @@ pub mod runtimes;
|
||||
pub mod sandboxing;
|
||||
pub mod spec;
|
||||
|
||||
use crate::conversation_history::format_output_for_model_body;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
pub use router::ToolRouter;
|
||||
use serde::Serialize;
|
||||
|
||||
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
|
||||
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
|
||||
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
@@ -73,249 +65,15 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
|
||||
let content = aggregated_output.text.as_str();
|
||||
|
||||
if exec_output.timed_out {
|
||||
let prefixed = format!(
|
||||
let body = if exec_output.timed_out {
|
||||
format!(
|
||||
"command timed out after {} milliseconds\n{content}",
|
||||
exec_output.duration.as_millis()
|
||||
);
|
||||
return format_exec_output(&prefixed);
|
||||
}
|
||||
|
||||
format_exec_output(content)
|
||||
}
|
||||
|
||||
pub(super) fn format_exec_output(content: &str) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
|
||||
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
|
||||
let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
|
||||
match err {
|
||||
FunctionCallError::RespondToModel(msg) => {
|
||||
FunctionCallError::RespondToModel(format_exec_output(&msg))
|
||||
}
|
||||
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
|
||||
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
let captures = regex
|
||||
.captures(message)
|
||||
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
|
||||
let body = captures
|
||||
.name("body")
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= MODEL_FORMAT_MAX_BYTES,
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
}
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
|
||||
let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
}
|
||||
} else {
|
||||
content.to_string()
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn truncate_formatted_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = format_exec_output(&large_error);
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
assert_truncated_message_matches(&truncated, line, total_lines);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_function_error_trims_respond_to_model() {
|
||||
let line = "respond-to-model error that should be truncated\n";
|
||||
let huge = line.repeat(3_000);
|
||||
let total_lines = huge.lines().count();
|
||||
|
||||
let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
|
||||
match err {
|
||||
FunctionCallError::RespondToModel(message) => {
|
||||
assert_truncated_message_matches(&message, line, total_lines);
|
||||
}
|
||||
other => panic!("unexpected error variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_function_error_trims_fatal() {
|
||||
let line = "fatal error output that should be truncated\n";
|
||||
let huge = line.repeat(3_000);
|
||||
let total_lines = huge.lines().count();
|
||||
|
||||
let err = truncate_function_error(FunctionCallError::Fatal(huge));
|
||||
match err {
|
||||
FunctionCallError::Fatal(message) => {
|
||||
assert_truncated_message_matches(&message, line, total_lines);
|
||||
}
|
||||
other => panic!("unexpected error variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_formatted_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = format_exec_output(&long_line);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let marker_line =
|
||||
format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_formatted_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(format_exec_output(&content), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_formatted_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = format_exec_output(&content);
|
||||
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_formatted_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = format_exec_output(&content);
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("output truncated to fit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
}
|
||||
// Truncate for model consumption before serialization.
|
||||
format_output_for_model_body(&body)
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ mod stream_no_completed;
|
||||
mod tool_harness;
|
||||
mod tool_parallelism;
|
||||
mod tools;
|
||||
mod truncation;
|
||||
mod unified_exec;
|
||||
mod user_notification;
|
||||
mod view_image;
|
||||
|
||||
270
codex-rs/core/tests/suite/truncation.rs
Normal file
270
codex-rs/core/tests/suite/truncation.rs
Normal file
@@ -0,0 +1,270 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use escargot::CargoBuild;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
// Verifies byte-truncation formatting for function error output (RespondToModel errors)
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
// Use the test model that wires function tools like grep_files
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
// Construct a very long, non-existent path to force a RespondToModel error with a large message
|
||||
let long_path = "a".repeat(20_000);
|
||||
let call_id = "grep-huge-error";
|
||||
let args = json!({
|
||||
"pattern": "alpha",
|
||||
"path": long_path,
|
||||
"limit": 10
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"trigger grep_files with long path to test truncation",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = mock
|
||||
.function_call_output_text(call_id)
|
||||
.context("function error output present")?;
|
||||
|
||||
tracing::debug!(output = %output, "truncated function error output");
|
||||
|
||||
// Expect plaintext with byte-truncation marker and no omitted-lines marker
|
||||
assert!(
|
||||
serde_json::from_str::<serde_json::Value>(&output).is_err(),
|
||||
"expected error output to be plain text",
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
assert!(
|
||||
!output.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
||||
.await?;
|
||||
|
||||
// Inspect what we sent back to the model; it should contain a truncated
|
||||
// function_call_output for the shell call.
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell call")?;
|
||||
|
||||
// Expect plain text (not JSON) with truncation markers and line elision.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: .* seconds
|
||||
Total output lines: 400
|
||||
Output:
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
.*
|
||||
\[\.{3} omitted 144 of 400 lines \.{3}\]
|
||||
|
||||
.*
|
||||
396
|
||||
397
|
||||
398
|
||||
399
|
||||
400
|
||||
$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that an MCP tool call result exceeding the model formatting limits
|
||||
// is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let call_id = "rmcp-truncated";
|
||||
let server_name = "rmcp";
|
||||
let tool_name = format!("mcp__{server_name}__echo");
|
||||
|
||||
// Build a very large message to exceed 10KiB once serialized.
|
||||
let large_msg = "long-message-with-newlines-".repeat(600);
|
||||
let args_json = serde_json::json!({ "message": large_msg });
|
||||
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Compile the rmcp stdio test server and configure it.
|
||||
let rmcp_test_server_bin = CargoBuild::new()
|
||||
.package("codex-rmcp-client")
|
||||
.bin("test_stdio_server")
|
||||
.run()?
|
||||
.path()
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::RmcpClient);
|
||||
config.mcp_servers.insert(
|
||||
server_name.to_string(),
|
||||
codex_core::config_types::McpServerConfig {
|
||||
transport: codex_core::config_types::McpServerTransportConfig::Stdio {
|
||||
command: rmcp_test_server_bin,
|
||||
args: Vec::new(),
|
||||
env: None,
|
||||
env_vars: Vec::new(),
|
||||
cwd: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
);
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy(
|
||||
"call the rmcp echo tool with a very large message",
|
||||
SandboxPolicy::ReadOnly,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// The MCP tool call output is converted to a function_call_output for the model.
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for rmcp call")?;
|
||||
|
||||
// Expect plain text with byte-based truncation marker.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated MCP output to be plain text"
|
||||
);
|
||||
assert!(
|
||||
output.starts_with("Total output lines: 1\n\n{"),
|
||||
"expected total line header and JSON head, got: {output}"
|
||||
);
|
||||
let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]")
|
||||
.expect("compile regex");
|
||||
assert!(
|
||||
byte_marker.is_match(&output),
|
||||
"expected byte truncation marker, got: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user