Centralize truncation in conversation history (#5652)

move the truncation logic to conversation history to use on any tool
output. This will help us in avoiding edge cases while truncating the
tool calls and mcp calls.
This commit is contained in:
Ahmed Ibrahim
2025-10-27 14:05:35 -07:00
committed by GitHub
parent 0fc295d958
commit 7226365397
6 changed files with 588 additions and 365 deletions

View File

@@ -2366,10 +2366,6 @@ mod tests {
use crate::state::TaskKind;
use crate::tasks::SessionTask;
use crate::tasks::SessionTaskContext;
use crate::tools::MODEL_FORMAT_HEAD_LINES;
use crate::tools::MODEL_FORMAT_MAX_BYTES;
use crate::tools::MODEL_FORMAT_MAX_LINES;
use crate::tools::MODEL_FORMAT_TAIL_LINES;
use crate::tools::ToolRouter;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
@@ -2456,95 +2452,6 @@ mod tests {
assert_eq!(expected, got);
}
#[test]
fn model_truncation_head_tail_by_lines() {
// Build 400 short lines so line-count limit, not byte budget, triggers truncation
let lines: Vec<String> = (1..=400).map(|i| format!("line{i}")).collect();
let full = lines.join("\n");
let exec = ExecToolCallOutput {
exit_code: 0,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(full),
duration: StdDuration::from_secs(1),
timed_out: false,
};
let out = format_exec_output_str(&exec);
// Strip truncation header if present for subsequent assertions
let body = out
.strip_prefix("Total output lines: ")
.and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
.unwrap_or(out.as_str());
// Expect elision marker with correct counts
let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
assert!(out.contains(&marker), "missing marker: {out}");
// Validate head and tail
let parts: Vec<&str> = body.split(&marker).collect();
assert_eq!(parts.len(), 2, "expected one marker split");
let head = parts[0];
let tail = parts[1];
let expected_head: String = (1..=MODEL_FORMAT_HEAD_LINES)
.map(|i| format!("line{i}"))
.collect::<Vec<_>>()
.join("\n");
assert!(head.starts_with(&expected_head), "head mismatch");
let expected_tail: String = ((400 - MODEL_FORMAT_TAIL_LINES + 1)..=400)
.map(|i| format!("line{i}"))
.collect::<Vec<_>>()
.join("\n");
assert!(tail.ends_with(&expected_tail), "tail mismatch");
}
#[test]
fn model_truncation_respects_byte_budget() {
// Construct a large output (about 100kB) so byte budget dominates
let big_line = "x".repeat(100);
let full = std::iter::repeat_n(big_line, 1000)
.collect::<Vec<_>>()
.join("\n");
let exec = ExecToolCallOutput {
exit_code: 0,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(full.clone()),
duration: StdDuration::from_secs(1),
timed_out: false,
};
let out = format_exec_output_str(&exec);
// Keep strict budget on the truncated body (excluding header)
let body = out
.strip_prefix("Total output lines: ")
.and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
.unwrap_or(out.as_str());
assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
assert!(out.contains("omitted"), "should contain elision marker");
// Ensure head and tail are drawn from the original
assert!(full.starts_with(body.chars().take(8).collect::<String>().as_str()));
assert!(
full.ends_with(
body.chars()
.rev()
.take(8)
.collect::<String>()
.chars()
.rev()
.collect::<String>()
.as_str()
)
);
}
#[test]
fn includes_timed_out_message() {
let exec = ExecToolCallOutput {

View File

@@ -2,9 +2,18 @@ use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::TokenUsage;
use codex_protocol::protocol::TokenUsageInfo;
use codex_utils_string::take_bytes_at_char_boundary;
use codex_utils_string::take_last_bytes_at_char_boundary;
use std::ops::Deref;
use tracing::error;
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
/// Transcript of conversation history
#[derive(Debug, Clone, Default)]
pub(crate) struct ConversationHistory {
@@ -47,7 +56,8 @@ impl ConversationHistory {
continue;
}
self.items.push(item.clone());
let processed = Self::process_item(&item);
self.items.push(processed);
}
}
@@ -68,6 +78,22 @@ impl ConversationHistory {
}
}
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
self.items = items;
}
pub(crate) fn update_token_info(
&mut self,
usage: &TokenUsage,
model_context_window: Option<i64>,
) {
self.token_info = TokenUsageInfo::new_or_append(
&self.token_info,
&Some(usage.clone()),
model_context_window,
);
}
/// This function enforces a couple of invariants on the in-memory history:
/// 1. every call (function/custom) has a corresponding output entry
/// 2. every output has a corresponding call entry
@@ -253,10 +279,6 @@ impl ConversationHistory {
}
}
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
self.items = items;
}
/// Removes the corresponding paired item for the provided `item`, if any.
///
/// Pairs:
@@ -326,19 +348,108 @@ impl ConversationHistory {
}
}
pub(crate) fn update_token_info(
&mut self,
usage: &TokenUsage,
model_context_window: Option<i64>,
) {
self.token_info = TokenUsageInfo::new_or_append(
&self.token_info,
&Some(usage.clone()),
model_context_window,
);
fn process_item(item: &ResponseItem) -> ResponseItem {
match item {
ResponseItem::FunctionCallOutput { call_id, output } => {
let truncated = format_output_for_model_body(output.content.as_str());
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: truncated,
success: output.success,
},
}
}
ResponseItem::CustomToolCallOutput { call_id, output } => {
let truncated = format_output_for_model_body(output);
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: truncated,
}
}
ResponseItem::Message { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::GhostSnapshot { .. }
| ResponseItem::Other => item.clone(),
}
}
}
pub(crate) fn format_output_for_model_body(content: &str) -> String {
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return content.to_string();
}
let output = truncate_formatted_exec_output(content, total_lines);
format!("Total output lines: {total_lines}\n\n{output}")
}
fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
let segments: Vec<&str> = content.split_inclusive('\n').collect();
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
let omitted = segments.len().saturating_sub(head_take + tail_take);
let head_slice_end: usize = segments
.iter()
.take(head_take)
.map(|segment| segment.len())
.sum();
let tail_slice_start: usize = if tail_take == 0 {
content.len()
} else {
content.len()
- segments
.iter()
.rev()
.take(tail_take)
.map(|segment| segment.len())
.sum::<usize>()
};
let head_slice = &content[..head_slice_end];
let tail_slice = &content[tail_slice_start..];
let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
let marker = if omitted > 0 {
Some(format!(
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
))
} else if truncated_by_bytes {
Some(format!(
"\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
))
} else {
None
};
let marker_len = marker.as_ref().map_or(0, String::len);
let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
result.push_str(head_part);
if let Some(marker_text) = marker.as_ref() {
result.push_str(marker_text);
}
let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
if remaining == 0 {
return result;
}
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
result.push_str(tail_part);
result
}
#[inline]
fn error_or_panic(message: String) {
if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
@@ -533,6 +644,183 @@ mod tests {
assert_eq!(h.contents(), vec![]);
}
#[test]
fn record_items_truncates_function_call_output_content() {
let mut history = ConversationHistory::new();
let long_line = "a very long line to trigger truncation\n";
let long_output = long_line.repeat(2_500);
let item = ResponseItem::FunctionCallOutput {
call_id: "call-100".to_string(),
output: FunctionCallOutputPayload {
content: long_output.clone(),
success: Some(true),
},
};
history.record_items([&item]);
assert_eq!(history.items.len(), 1);
match &history.items[0] {
ResponseItem::FunctionCallOutput { output, .. } => {
assert_ne!(output.content, long_output);
assert!(
output.content.starts_with("Total output lines:"),
"expected truncated summary, got {}",
output.content
);
}
other => panic!("unexpected history item: {other:?}"),
}
}
#[test]
fn record_items_truncates_custom_tool_call_output_content() {
let mut history = ConversationHistory::new();
let line = "custom output that is very long\n";
let long_output = line.repeat(2_500);
let item = ResponseItem::CustomToolCallOutput {
call_id: "tool-200".to_string(),
output: long_output.clone(),
};
history.record_items([&item]);
assert_eq!(history.items.len(), 1);
match &history.items[0] {
ResponseItem::CustomToolCallOutput { output, .. } => {
assert_ne!(output, &long_output);
assert!(
output.starts_with("Total output lines:"),
"expected truncated summary, got {output}"
);
}
other => panic!("unexpected history item: {other:?}"),
}
}
// The following tests were adapted from tools::mod truncation tests to
// target the new truncation functions in conversation_history.
use regex_lite::Regex;
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
let pattern = truncated_message_pattern(line, total_lines);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
panic!("failed to compile regex {pattern}: {err}");
});
let captures = regex
.captures(message)
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
let body = captures
.name("body")
.expect("missing body capture")
.as_str();
assert!(
body.len() <= MODEL_FORMAT_MAX_BYTES,
"body exceeds byte limit: {} bytes",
body.len()
);
}
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
let omitted = total_lines.saturating_sub(head_take + tail_take);
let escaped_line = regex_lite::escape(line);
if omitted == 0 {
return format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
);
}
format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
)
}
#[test]
fn format_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits
let truncated = format_output_for_model_body(&large_error);
let total_lines = large_error.lines().count();
assert_truncated_message_matches(&truncated, line, total_lines);
assert_ne!(truncated, large_error);
}
#[test]
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
let truncated = format_output_for_model_body(&long_line);
assert_ne!(truncated, long_line);
let marker_line =
format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
assert!(
truncated.contains(&marker_line),
"missing byte truncation marker: {truncated}"
);
assert!(
!truncated.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {truncated}"
);
}
#[test]
fn format_exec_output_returns_original_when_within_limits() {
let content = "example output\n".repeat(10);
assert_eq!(format_output_for_model_body(&content), content);
}
#[test]
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}\n"))
.collect();
let truncated = format_output_for_model_body(&content);
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
assert!(
truncated.contains(&expected_marker),
"missing omitted marker: {truncated}"
);
assert!(
truncated.contains("line-0\n"),
"expected head line to remain: {truncated}"
);
let last_line = format!("line-{}\n", total_lines - 1);
assert!(
truncated.contains(&last_line),
"expected tail line to remain: {truncated}"
);
}
#[test]
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
let long_line = "x".repeat(256);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{long_line}\n"))
.collect();
let truncated = format_output_for_model_body(&content);
assert!(
truncated.contains("[... omitted 42 of 298 lines ...]"),
"expected omitted marker when line count exceeds limit: {truncated}"
);
assert!(
!truncated.contains("output truncated to fit"),
"line omission marker should take precedence over byte marker: {truncated}"
);
}
//TODO(aibrahim): run CI in release mode.
#[cfg(not(debug_assertions))]
#[test]

View File

@@ -19,7 +19,6 @@ use std::path::Path;
use std::path::PathBuf;
use std::time::Duration;
use super::format_exec_output;
use super::format_exec_output_str;
#[derive(Clone, Copy)]
@@ -146,7 +145,7 @@ impl ToolEmitter {
(*message).to_string(),
-1,
Duration::ZERO,
format_exec_output(&message),
message.clone(),
)
.await;
}
@@ -241,7 +240,7 @@ impl ToolEmitter {
(*message).to_string(),
-1,
Duration::ZERO,
format_exec_output(&message),
message.clone(),
)
.await;
}
@@ -277,7 +276,7 @@ impl ToolEmitter {
}
Err(ToolError::Codex(err)) => {
let message = format!("execution error: {err:?}");
let response = super::format_exec_output(&message);
let response = message.clone();
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
Err(FunctionCallError::RespondToModel(response))
}
@@ -289,9 +288,9 @@ impl ToolEmitter {
} else {
msg
};
let response = super::format_exec_output(&normalized);
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
Err(FunctionCallError::RespondToModel(response))
let response = &normalized;
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
Err(FunctionCallError::RespondToModel(response.clone()))
}
};
self.emit(ctx, event).await;

View File

@@ -9,19 +9,11 @@ pub mod runtimes;
pub mod sandboxing;
pub mod spec;
use crate::conversation_history::format_output_for_model_body;
use crate::exec::ExecToolCallOutput;
use codex_utils_string::take_bytes_at_char_boundary;
use codex_utils_string::take_last_bytes_at_char_boundary;
pub use router::ToolRouter;
use serde::Serialize;
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
// Telemetry preview limits: keep log events smaller than model budgets.
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
@@ -73,249 +65,15 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
let content = aggregated_output.text.as_str();
if exec_output.timed_out {
let prefixed = format!(
let body = if exec_output.timed_out {
format!(
"command timed out after {} milliseconds\n{content}",
exec_output.duration.as_millis()
);
return format_exec_output(&prefixed);
}
format_exec_output(content)
}
pub(super) fn format_exec_output(content: &str) -> String {
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return content.to_string();
}
let output = truncate_formatted_exec_output(content, total_lines);
format!("Total output lines: {total_lines}\n\n{output}")
}
fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
let segments: Vec<&str> = content.split_inclusive('\n').collect();
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
let omitted = segments.len().saturating_sub(head_take + tail_take);
let head_slice_end: usize = segments
.iter()
.take(head_take)
.map(|segment| segment.len())
.sum();
let tail_slice_start: usize = if tail_take == 0 {
content.len()
} else {
content.len()
- segments
.iter()
.rev()
.take(tail_take)
.map(|segment| segment.len())
.sum::<usize>()
};
let head_slice = &content[..head_slice_end];
let tail_slice = &content[tail_slice_start..];
let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
let marker = if omitted > 0 {
Some(format!(
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
))
} else if truncated_by_bytes {
Some(format!(
"\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
))
} else {
None
};
let marker_len = marker.as_ref().map_or(0, String::len);
let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
result.push_str(head_part);
if let Some(marker_text) = marker.as_ref() {
result.push_str(marker_text);
}
let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
if remaining == 0 {
return result;
}
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
result.push_str(tail_part);
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::function_tool::FunctionCallError;
use regex_lite::Regex;
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
match err {
FunctionCallError::RespondToModel(msg) => {
FunctionCallError::RespondToModel(format_exec_output(&msg))
}
FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
other => other,
}
}
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
let pattern = truncated_message_pattern(line, total_lines);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
panic!("failed to compile regex {pattern}: {err}");
});
let captures = regex
.captures(message)
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
let body = captures
.name("body")
.expect("missing body capture")
.as_str();
assert!(
body.len() <= MODEL_FORMAT_MAX_BYTES,
"body exceeds byte limit: {} bytes",
body.len()
);
}
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
let omitted = total_lines.saturating_sub(head_take + tail_take);
let escaped_line = regex_lite::escape(line);
if omitted == 0 {
return format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
);
}
format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
)
}
} else {
content.to_string()
};
#[test]
fn truncate_formatted_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits
let truncated = format_exec_output(&large_error);
let total_lines = large_error.lines().count();
assert_truncated_message_matches(&truncated, line, total_lines);
assert_ne!(truncated, large_error);
}
#[test]
fn truncate_function_error_trims_respond_to_model() {
let line = "respond-to-model error that should be truncated\n";
let huge = line.repeat(3_000);
let total_lines = huge.lines().count();
let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
match err {
FunctionCallError::RespondToModel(message) => {
assert_truncated_message_matches(&message, line, total_lines);
}
other => panic!("unexpected error variant: {other:?}"),
}
}
#[test]
fn truncate_function_error_trims_fatal() {
let line = "fatal error output that should be truncated\n";
let huge = line.repeat(3_000);
let total_lines = huge.lines().count();
let err = truncate_function_error(FunctionCallError::Fatal(huge));
match err {
FunctionCallError::Fatal(message) => {
assert_truncated_message_matches(&message, line, total_lines);
}
other => panic!("unexpected error variant: {other:?}"),
}
}
#[test]
fn truncate_formatted_exec_output_marks_byte_truncation_without_omitted_lines() {
let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
let truncated = format_exec_output(&long_line);
assert_ne!(truncated, long_line);
let marker_line =
format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
assert!(
truncated.contains(&marker_line),
"missing byte truncation marker: {truncated}"
);
assert!(
!truncated.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {truncated}"
);
}
#[test]
fn truncate_formatted_exec_output_returns_original_when_within_limits() {
let content = "example output\n".repeat(10);
assert_eq!(format_exec_output(&content), content);
}
#[test]
fn truncate_formatted_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}\n"))
.collect();
let truncated = format_exec_output(&content);
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
assert!(
truncated.contains(&expected_marker),
"missing omitted marker: {truncated}"
);
assert!(
truncated.contains("line-0\n"),
"expected head line to remain: {truncated}"
);
let last_line = format!("line-{}\n", total_lines - 1);
assert!(
truncated.contains(&last_line),
"expected tail line to remain: {truncated}"
);
}
#[test]
fn truncate_formatted_exec_output_prefers_line_marker_when_both_limits_exceeded() {
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
let long_line = "x".repeat(256);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{long_line}\n"))
.collect();
let truncated = format_exec_output(&content);
assert!(
truncated.contains("[... omitted 42 of 298 lines ...]"),
"expected omitted marker when line count exceeds limit: {truncated}"
);
assert!(
!truncated.contains("output truncated to fit"),
"line omission marker should take precedence over byte marker: {truncated}"
);
}
// Truncate for model consumption before serialization.
format_output_for_model_body(&body)
}