Centralize truncation in conversation history (#5652)

move the truncation logic to conversation history to use on any tool output. This will help us in avoiding edge cases while truncating the tool calls and mcp calls.
2025-10-27 14:05:35 -07:00
parent 0fc295d958
commit 7226365397
6 changed files with 588 additions and 365 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2366,10 +2366,6 @@ mod tests {
    use crate::state::TaskKind;
    use crate::tasks::SessionTask;
    use crate::tasks::SessionTaskContext;
-    use crate::tools::MODEL_FORMAT_HEAD_LINES;
-    use crate::tools::MODEL_FORMAT_MAX_BYTES;
-    use crate::tools::MODEL_FORMAT_MAX_LINES;
-    use crate::tools::MODEL_FORMAT_TAIL_LINES;
    use crate::tools::ToolRouter;
    use crate::tools::context::ToolInvocation;
    use crate::tools::context::ToolOutput;
@@ -2456,95 +2452,6 @@ mod tests {
        assert_eq!(expected, got);
    }

-    #[test]
-    fn model_truncation_head_tail_by_lines() {
-        // Build 400 short lines so line-count limit, not byte budget, triggers truncation
-        let lines: Vec<String> = (1..=400).map(|i| format!("line{i}")).collect();
-        let full = lines.join("\n");
-
-        let exec = ExecToolCallOutput {
-            exit_code: 0,
-            stdout: StreamOutput::new(String::new()),
-            stderr: StreamOutput::new(String::new()),
-            aggregated_output: StreamOutput::new(full),
-            duration: StdDuration::from_secs(1),
-            timed_out: false,
-        };
-
-        let out = format_exec_output_str(&exec);
-
-        // Strip truncation header if present for subsequent assertions
-        let body = out
-            .strip_prefix("Total output lines: ")
-            .and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
-            .unwrap_or(out.as_str());
-
-        // Expect elision marker with correct counts
-        let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
-        let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
-        assert!(out.contains(&marker), "missing marker: {out}");
-
-        // Validate head and tail
-        let parts: Vec<&str> = body.split(&marker).collect();
-        assert_eq!(parts.len(), 2, "expected one marker split");
-        let head = parts[0];
-        let tail = parts[1];
-
-        let expected_head: String = (1..=MODEL_FORMAT_HEAD_LINES)
-            .map(|i| format!("line{i}"))
-            .collect::<Vec<_>>()
-            .join("\n");
-        assert!(head.starts_with(&expected_head), "head mismatch");
-
-        let expected_tail: String = ((400 - MODEL_FORMAT_TAIL_LINES + 1)..=400)
-            .map(|i| format!("line{i}"))
-            .collect::<Vec<_>>()
-            .join("\n");
-        assert!(tail.ends_with(&expected_tail), "tail mismatch");
-    }
-
-    #[test]
-    fn model_truncation_respects_byte_budget() {
-        // Construct a large output (about 100kB) so byte budget dominates
-        let big_line = "x".repeat(100);
-        let full = std::iter::repeat_n(big_line, 1000)
-            .collect::<Vec<_>>()
-            .join("\n");
-
-        let exec = ExecToolCallOutput {
-            exit_code: 0,
-            stdout: StreamOutput::new(String::new()),
-            stderr: StreamOutput::new(String::new()),
-            aggregated_output: StreamOutput::new(full.clone()),
-            duration: StdDuration::from_secs(1),
-            timed_out: false,
-        };
-
-        let out = format_exec_output_str(&exec);
-        // Keep strict budget on the truncated body (excluding header)
-        let body = out
-            .strip_prefix("Total output lines: ")
-            .and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
-            .unwrap_or(out.as_str());
-        assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
-        assert!(out.contains("omitted"), "should contain elision marker");
-
-        // Ensure head and tail are drawn from the original
-        assert!(full.starts_with(body.chars().take(8).collect::<String>().as_str()));
-        assert!(
-            full.ends_with(
-                body.chars()
-                    .rev()
-                    .take(8)
-                    .collect::<String>()
-                    .chars()
-                    .rev()
-                    .collect::<String>()
-                    .as_str()
-            )
-        );
-    }
-
    #[test]
    fn includes_timed_out_message() {
        let exec = ExecToolCallOutput {
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -2,9 +2,18 @@ use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::TokenUsage;
 use codex_protocol::protocol::TokenUsageInfo;
+use codex_utils_string::take_bytes_at_char_boundary;
+use codex_utils_string::take_last_bytes_at_char_boundary;
 use std::ops::Deref;
 use tracing::error;

+// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
+pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
+pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
+pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
+pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
+pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
+
 /// Transcript of conversation history
 #[derive(Debug, Clone, Default)]
 pub(crate) struct ConversationHistory {
@@ -47,7 +56,8 @@ impl ConversationHistory {
                continue;
            }

-            self.items.push(item.clone());
+            let processed = Self::process_item(&item);
+            self.items.push(processed);
        }
    }

@@ -68,6 +78,22 @@ impl ConversationHistory {
        }
    }

+    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
+        self.items = items;
+    }
+
+    pub(crate) fn update_token_info(
+        &mut self,
+        usage: &TokenUsage,
+        model_context_window: Option<i64>,
+    ) {
+        self.token_info = TokenUsageInfo::new_or_append(
+            &self.token_info,
+            &Some(usage.clone()),
+            model_context_window,
+        );
+    }
+
    /// This function enforces a couple of invariants on the in-memory history:
    /// 1. every call (function/custom) has a corresponding output entry
    /// 2. every output has a corresponding call entry
@@ -253,10 +279,6 @@ impl ConversationHistory {
        }
    }

-    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
-        self.items = items;
-    }
-
    /// Removes the corresponding paired item for the provided `item`, if any.
    ///
    /// Pairs:
@@ -326,19 +348,108 @@ impl ConversationHistory {
        }
    }

-    pub(crate) fn update_token_info(
-        &mut self,
-        usage: &TokenUsage,
-        model_context_window: Option<i64>,
-    ) {
-        self.token_info = TokenUsageInfo::new_or_append(
-            &self.token_info,
-            &Some(usage.clone()),
-            model_context_window,
-        );
+    fn process_item(item: &ResponseItem) -> ResponseItem {
+        match item {
+            ResponseItem::FunctionCallOutput { call_id, output } => {
+                let truncated = format_output_for_model_body(output.content.as_str());
+                ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: FunctionCallOutputPayload {
+                        content: truncated,
+                        success: output.success,
+                    },
+                }
+            }
+            ResponseItem::CustomToolCallOutput { call_id, output } => {
+                let truncated = format_output_for_model_body(output);
+                ResponseItem::CustomToolCallOutput {
+                    call_id: call_id.clone(),
+                    output: truncated,
+                }
+            }
+            ResponseItem::Message { .. }
+            | ResponseItem::Reasoning { .. }
+            | ResponseItem::LocalShellCall { .. }
+            | ResponseItem::FunctionCall { .. }
+            | ResponseItem::WebSearchCall { .. }
+            | ResponseItem::CustomToolCall { .. }
+            | ResponseItem::GhostSnapshot { .. }
+            | ResponseItem::Other => item.clone(),
+        }
    }
 }

+pub(crate) fn format_output_for_model_body(content: &str) -> String {
+    // Head+tail truncation for the model: show the beginning and end with an elision.
+    // Clients still receive full streams; only this formatted summary is capped.
+    let total_lines = content.lines().count();
+    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
+        return content.to_string();
+    }
+    let output = truncate_formatted_exec_output(content, total_lines);
+    format!("Total output lines: {total_lines}\n\n{output}")
+}
+
+fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
+    let segments: Vec<&str> = content.split_inclusive('\n').collect();
+    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
+    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
+    let omitted = segments.len().saturating_sub(head_take + tail_take);
+
+    let head_slice_end: usize = segments
+        .iter()
+        .take(head_take)
+        .map(|segment| segment.len())
+        .sum();
+    let tail_slice_start: usize = if tail_take == 0 {
+        content.len()
+    } else {
+        content.len()
+            - segments
+                .iter()
+                .rev()
+                .take(tail_take)
+                .map(|segment| segment.len())
+                .sum::<usize>()
+    };
+    let head_slice = &content[..head_slice_end];
+    let tail_slice = &content[tail_slice_start..];
+    let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
+    // this is a bit wrong. We are counting metadata lines and not just shell output lines.
+    let marker = if omitted > 0 {
+        Some(format!(
+            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
+        ))
+    } else if truncated_by_bytes {
+        Some(format!(
+            "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
+        ))
+    } else {
+        None
+    };
+
+    let marker_len = marker.as_ref().map_or(0, String::len);
+    let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
+    let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
+    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
+    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
+
+    result.push_str(head_part);
+    if let Some(marker_text) = marker.as_ref() {
+        result.push_str(marker_text);
+    }
+
+    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
+    if remaining == 0 {
+        return result;
+    }
+
+    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
+    result.push_str(tail_part);
+
+    result
+}
+
 #[inline]
 fn error_or_panic(message: String) {
    if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
@@ -533,6 +644,183 @@ mod tests {
        assert_eq!(h.contents(), vec![]);
    }

+    #[test]
+    fn record_items_truncates_function_call_output_content() {
+        let mut history = ConversationHistory::new();
+        let long_line = "a very long line to trigger truncation\n";
+        let long_output = long_line.repeat(2_500);
+        let item = ResponseItem::FunctionCallOutput {
+            call_id: "call-100".to_string(),
+            output: FunctionCallOutputPayload {
+                content: long_output.clone(),
+                success: Some(true),
+            },
+        };
+
+        history.record_items([&item]);
+
+        assert_eq!(history.items.len(), 1);
+        match &history.items[0] {
+            ResponseItem::FunctionCallOutput { output, .. } => {
+                assert_ne!(output.content, long_output);
+                assert!(
+                    output.content.starts_with("Total output lines:"),
+                    "expected truncated summary, got {}",
+                    output.content
+                );
+            }
+            other => panic!("unexpected history item: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn record_items_truncates_custom_tool_call_output_content() {
+        let mut history = ConversationHistory::new();
+        let line = "custom output that is very long\n";
+        let long_output = line.repeat(2_500);
+        let item = ResponseItem::CustomToolCallOutput {
+            call_id: "tool-200".to_string(),
+            output: long_output.clone(),
+        };
+
+        history.record_items([&item]);
+
+        assert_eq!(history.items.len(), 1);
+        match &history.items[0] {
+            ResponseItem::CustomToolCallOutput { output, .. } => {
+                assert_ne!(output, &long_output);
+                assert!(
+                    output.starts_with("Total output lines:"),
+                    "expected truncated summary, got {output}"
+                );
+            }
+            other => panic!("unexpected history item: {other:?}"),
+        }
+    }
+
+    // The following tests were adapted from tools::mod truncation tests to
+    // target the new truncation functions in conversation_history.
+
+    use regex_lite::Regex;
+
+    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
+        let pattern = truncated_message_pattern(line, total_lines);
+        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
+            panic!("failed to compile regex {pattern}: {err}");
+        });
+        let captures = regex
+            .captures(message)
+            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
+        let body = captures
+            .name("body")
+            .expect("missing body capture")
+            .as_str();
+        assert!(
+            body.len() <= MODEL_FORMAT_MAX_BYTES,
+            "body exceeds byte limit: {} bytes",
+            body.len()
+        );
+    }
+
+    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
+        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
+        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
+        let omitted = total_lines.saturating_sub(head_take + tail_take);
+        let escaped_line = regex_lite::escape(line);
+        if omitted == 0 {
+            return format!(
+                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
+            );
+        }
+        format!(
+            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
+        )
+    }
+
+    #[test]
+    fn format_exec_output_truncates_large_error() {
+        let line = "very long execution error line that should trigger truncation\n";
+        let large_error = line.repeat(2_500); // way beyond both byte and line limits
+
+        let truncated = format_output_for_model_body(&large_error);
+
+        let total_lines = large_error.lines().count();
+        assert_truncated_message_matches(&truncated, line, total_lines);
+        assert_ne!(truncated, large_error);
+    }
+
+    #[test]
+    fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
+        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
+        let truncated = format_output_for_model_body(&long_line);
+
+        assert_ne!(truncated, long_line);
+        let marker_line =
+            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
+        assert!(
+            truncated.contains(&marker_line),
+            "missing byte truncation marker: {truncated}"
+        );
+        assert!(
+            !truncated.contains("omitted"),
+            "line omission marker should not appear when no lines were dropped: {truncated}"
+        );
+    }
+
+    #[test]
+    fn format_exec_output_returns_original_when_within_limits() {
+        let content = "example output\n".repeat(10);
+
+        assert_eq!(format_output_for_model_body(&content), content);
+    }
+
+    #[test]
+    fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
+        let total_lines = MODEL_FORMAT_MAX_LINES + 100;
+        let content: String = (0..total_lines)
+            .map(|idx| format!("line-{idx}\n"))
+            .collect();
+
+        let truncated = format_output_for_model_body(&content);
+        let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
+        let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
+
+        assert!(
+            truncated.contains(&expected_marker),
+            "missing omitted marker: {truncated}"
+        );
+        assert!(
+            truncated.contains("line-0\n"),
+            "expected head line to remain: {truncated}"
+        );
+
+        let last_line = format!("line-{}\n", total_lines - 1);
+        assert!(
+            truncated.contains(&last_line),
+            "expected tail line to remain: {truncated}"
+        );
+    }
+
+    #[test]
+    fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
+        let total_lines = MODEL_FORMAT_MAX_LINES + 42;
+        let long_line = "x".repeat(256);
+        let content: String = (0..total_lines)
+            .map(|idx| format!("line-{idx}-{long_line}\n"))
+            .collect();
+
+        let truncated = format_output_for_model_body(&content);
+
+        assert!(
+            truncated.contains("[... omitted 42 of 298 lines ...]"),
+            "expected omitted marker when line count exceeds limit: {truncated}"
+        );
+        assert!(
+            !truncated.contains("output truncated to fit"),
+            "line omission marker should take precedence over byte marker: {truncated}"
+        );
+    }
+
    //TODO(aibrahim): run CI in release mode.
    #[cfg(not(debug_assertions))]
    #[test]
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -19,7 +19,6 @@ use std::path::Path;
 use std::path::PathBuf;
 use std::time::Duration;

-use super::format_exec_output;
 use super::format_exec_output_str;

 #[derive(Clone, Copy)]
@@ -146,7 +145,7 @@ impl ToolEmitter {
                    (*message).to_string(),
                    -1,
                    Duration::ZERO,
-                    format_exec_output(&message),
+                    message.clone(),
                )
                .await;
            }
@@ -241,7 +240,7 @@ impl ToolEmitter {
                    (*message).to_string(),
                    -1,
                    Duration::ZERO,
-                    format_exec_output(&message),
+                    message.clone(),
                )
                .await;
            }
@@ -277,7 +276,7 @@ impl ToolEmitter {
            }
            Err(ToolError::Codex(err)) => {
                let message = format!("execution error: {err:?}");
-                let response = super::format_exec_output(&message);
+                let response = message.clone();
                event = ToolEventStage::Failure(ToolEventFailure::Message(message));
                Err(FunctionCallError::RespondToModel(response))
            }
@@ -289,9 +288,9 @@ impl ToolEmitter {
                } else {
                    msg
                };
-                let response = super::format_exec_output(&normalized);
-                event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
-                Err(FunctionCallError::RespondToModel(response))
+                let response = &normalized;
+                event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
+                Err(FunctionCallError::RespondToModel(response.clone()))
            }
        };
        self.emit(ctx, event).await;
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -9,19 +9,11 @@ pub mod runtimes;
 pub mod sandboxing;
 pub mod spec;

+use crate::conversation_history::format_output_for_model_body;
 use crate::exec::ExecToolCallOutput;
-use codex_utils_string::take_bytes_at_char_boundary;
-use codex_utils_string::take_last_bytes_at_char_boundary;
 pub use router::ToolRouter;
 use serde::Serialize;

-// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
-pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
-pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
-pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
-pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
-pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
-
 // Telemetry preview limits: keep log events smaller than model budgets.
 pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
 pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
@@ -73,249 +65,15 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {

    let content = aggregated_output.text.as_str();

-    if exec_output.timed_out {
-        let prefixed = format!(
+    let body = if exec_output.timed_out {
+        format!(
            "command timed out after {} milliseconds\n{content}",
            exec_output.duration.as_millis()
-        );
-        return format_exec_output(&prefixed);
-    }
-
-    format_exec_output(content)
-}
-
-pub(super) fn format_exec_output(content: &str) -> String {
-    // Head+tail truncation for the model: show the beginning and end with an elision.
-    // Clients still receive full streams; only this formatted summary is capped.
-    let total_lines = content.lines().count();
-    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
-        return content.to_string();
-    }
-    let output = truncate_formatted_exec_output(content, total_lines);
-    format!("Total output lines: {total_lines}\n\n{output}")
-}
-
-fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
-    let segments: Vec<&str> = content.split_inclusive('\n').collect();
-    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
-    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
-    let omitted = segments.len().saturating_sub(head_take + tail_take);
-
-    let head_slice_end: usize = segments
-        .iter()
-        .take(head_take)
-        .map(|segment| segment.len())
-        .sum();
-    let tail_slice_start: usize = if tail_take == 0 {
-        content.len()
-    } else {
-        content.len()
-            - segments
-                .iter()
-                .rev()
-                .take(tail_take)
-                .map(|segment| segment.len())
-                .sum::<usize>()
-    };
-    let head_slice = &content[..head_slice_end];
-    let tail_slice = &content[tail_slice_start..];
-    let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
-    let marker = if omitted > 0 {
-        Some(format!(
-            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
-        ))
-    } else if truncated_by_bytes {
-        Some(format!(
-            "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
-        ))
-    } else {
-        None
-    };
-
-    let marker_len = marker.as_ref().map_or(0, String::len);
-    let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
-    let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
-    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
-    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
-
-    result.push_str(head_part);
-    if let Some(marker_text) = marker.as_ref() {
-        result.push_str(marker_text);
-    }
-
-    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
-    if remaining == 0 {
-        return result;
-    }
-
-    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
-    result.push_str(tail_part);
-
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::function_tool::FunctionCallError;
-    use regex_lite::Regex;
-
-    fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
-        match err {
-            FunctionCallError::RespondToModel(msg) => {
-                FunctionCallError::RespondToModel(format_exec_output(&msg))
-            }
-            FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
-            FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
-            other => other,
-        }
-    }
-
-    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
-        let pattern = truncated_message_pattern(line, total_lines);
-        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
-            panic!("failed to compile regex {pattern}: {err}");
-        });
-        let captures = regex
-            .captures(message)
-            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
-        let body = captures
-            .name("body")
-            .expect("missing body capture")
-            .as_str();
-        assert!(
-            body.len() <= MODEL_FORMAT_MAX_BYTES,
-            "body exceeds byte limit: {} bytes",
-            body.len()
-        );
-    }
-
-    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
-        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
-        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
-        let omitted = total_lines.saturating_sub(head_take + tail_take);
-        let escaped_line = regex_lite::escape(line);
-        if omitted == 0 {
-            return format!(
-                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
-            );
-        }
-        format!(
-            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
        )
-    }
+    } else {
+        content.to_string()
+    };

-    #[test]
-    fn truncate_formatted_exec_output_truncates_large_error() {
-        let line = "very long execution error line that should trigger truncation\n";
-        let large_error = line.repeat(2_500); // way beyond both byte and line limits
-
-        let truncated = format_exec_output(&large_error);
-
-        let total_lines = large_error.lines().count();
-        assert_truncated_message_matches(&truncated, line, total_lines);
-        assert_ne!(truncated, large_error);
-    }
-
-    #[test]
-    fn truncate_function_error_trims_respond_to_model() {
-        let line = "respond-to-model error that should be truncated\n";
-        let huge = line.repeat(3_000);
-        let total_lines = huge.lines().count();
-
-        let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
-        match err {
-            FunctionCallError::RespondToModel(message) => {
-                assert_truncated_message_matches(&message, line, total_lines);
-            }
-            other => panic!("unexpected error variant: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn truncate_function_error_trims_fatal() {
-        let line = "fatal error output that should be truncated\n";
-        let huge = line.repeat(3_000);
-        let total_lines = huge.lines().count();
-
-        let err = truncate_function_error(FunctionCallError::Fatal(huge));
-        match err {
-            FunctionCallError::Fatal(message) => {
-                assert_truncated_message_matches(&message, line, total_lines);
-            }
-            other => panic!("unexpected error variant: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_marks_byte_truncation_without_omitted_lines() {
-        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
-        let truncated = format_exec_output(&long_line);
-
-        assert_ne!(truncated, long_line);
-        let marker_line =
-            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
-        assert!(
-            truncated.contains(&marker_line),
-            "missing byte truncation marker: {truncated}"
-        );
-        assert!(
-            !truncated.contains("omitted"),
-            "line omission marker should not appear when no lines were dropped: {truncated}"
-        );
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_returns_original_when_within_limits() {
-        let content = "example output\n".repeat(10);
-
-        assert_eq!(format_exec_output(&content), content);
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
-        let total_lines = MODEL_FORMAT_MAX_LINES + 100;
-        let content: String = (0..total_lines)
-            .map(|idx| format!("line-{idx}\n"))
-            .collect();
-
-        let truncated = format_exec_output(&content);
-        let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
-        let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
-
-        assert!(
-            truncated.contains(&expected_marker),
-            "missing omitted marker: {truncated}"
-        );
-        assert!(
-            truncated.contains("line-0\n"),
-            "expected head line to remain: {truncated}"
-        );
-
-        let last_line = format!("line-{}\n", total_lines - 1);
-        assert!(
-            truncated.contains(&last_line),
-            "expected tail line to remain: {truncated}"
-        );
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_prefers_line_marker_when_both_limits_exceeded() {
-        let total_lines = MODEL_FORMAT_MAX_LINES + 42;
-        let long_line = "x".repeat(256);
-        let content: String = (0..total_lines)
-            .map(|idx| format!("line-{idx}-{long_line}\n"))
-            .collect();
-
-        let truncated = format_exec_output(&content);
-
-        assert!(
-            truncated.contains("[... omitted 42 of 298 lines ...]"),
-            "expected omitted marker when line count exceeds limit: {truncated}"
-        );
-        assert!(
-            !truncated.contains("output truncated to fit"),
-            "line omission marker should take precedence over byte marker: {truncated}"
-        );
-    }
+    // Truncate for model consumption before serialization.
+    format_output_for_model_body(&body)
 }
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -33,6 +33,7 @@ mod stream_no_completed;
 mod tool_harness;
 mod tool_parallelism;
 mod tools;
+mod truncation;
 mod unified_exec;
 mod user_notification;
 mod view_image;
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -0,0 +1,270 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+
+use anyhow::Context;
+use anyhow::Result;
+use codex_core::features::Feature;
+use codex_core::model_family::find_family_for_model;
+use codex_core::protocol::SandboxPolicy;
+use core_test_support::assert_regex_match;
+use core_test_support::responses;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::mount_sse_sequence;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
+use escargot::CargoBuild;
+use regex_lite::Regex;
+use serde_json::Value;
+use serde_json::json;
+use wiremock::matchers::any;
+
+// Verifies byte-truncation formatting for function error output (RespondToModel errors)
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        // Use the test model that wires function tools like grep_files
+        config.model = "test-gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("test-gpt-5-codex").expect("model family for test model");
+    });
+    let test = builder.build(&server).await?;
+
+    // Construct a very long, non-existent path to force a RespondToModel error with a large message
+    let long_path = "a".repeat(20_000);
+    let call_id = "grep-huge-error";
+    let args = json!({
+        "pattern": "alpha",
+        "path": long_path,
+        "limit": 10
+    });
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    let mock = mount_sse_sequence(&server, responses).await;
+
+    test.submit_turn_with_policy(
+        "trigger grep_files with long path to test truncation",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let output = mock
+        .function_call_output_text(call_id)
+        .context("function error output present")?;
+
+    tracing::debug!(output = %output, "truncated function error output");
+
+    // Expect plaintext with byte-truncation marker and no omitted-lines marker
+    assert!(
+        serde_json::from_str::<serde_json::Value>(&output).is_err(),
+        "expected error output to be plain text",
+    );
+    let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#;
+    assert_regex_match(truncated_pattern, &output);
+    assert!(
+        !output.contains("omitted"),
+        "line omission marker should not appear when no lines were dropped: {output}"
+    );
+
+    Ok(())
+}
+
+// Verifies that a standard tool call (shell) exceeding the model formatting
+// limits is truncated before being sent back to the model.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    // Use a model that exposes the generic shell tool.
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-too-large";
+    let args = serde_json::json!({
+        "command": ["/bin/sh", "-c", "seq 1 400"],
+        "timeout_ms": 5_000,
+    });
+
+    // First response: model tells us to run the tool; second: complete the turn.
+    mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let mock2 = mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_assistant_message("msg-1", "done"),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    // Inspect what we sent back to the model; it should contain a truncated
+    // function_call_output for the shell call.
+    let output = mock2
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("function_call_output present for shell call")?;
+
+    // Expect plain text (not JSON) with truncation markers and line elision.
+    assert!(
+        serde_json::from_str::<Value>(&output).is_err(),
+        "expected truncated shell output to be plain text"
+    );
+    let truncated_pattern = r#"(?s)^Exit code: 0
+Wall time: .* seconds
+Total output lines: 400
+Output:
+1
+2
+3
+4
+5
+6
+.*
+\[\.{3} omitted 144 of 400 lines \.{3}\]
+
+.*
+396
+397
+398
+399
+400
+$"#;
+    assert_regex_match(truncated_pattern, &output);
+
+    Ok(())
+}
+
+// Verifies that an MCP tool call result exceeding the model formatting limits
+// is truncated before being sent back to the model.
+#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let call_id = "rmcp-truncated";
+    let server_name = "rmcp";
+    let tool_name = format!("mcp__{server_name}__echo");
+
+    // Build a very large message to exceed 10KiB once serialized.
+    let large_msg = "long-message-with-newlines-".repeat(600);
+    let args_json = serde_json::json!({ "message": large_msg });
+
+    mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let mock2 = mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    // Compile the rmcp stdio test server and configure it.
+    let rmcp_test_server_bin = CargoBuild::new()
+        .package("codex-rmcp-client")
+        .bin("test_stdio_server")
+        .run()?
+        .path()
+        .to_string_lossy()
+        .into_owned();
+
+    let mut builder = test_codex().with_config(move |config| {
+        config.features.enable(Feature::RmcpClient);
+        config.mcp_servers.insert(
+            server_name.to_string(),
+            codex_core::config_types::McpServerConfig {
+                transport: codex_core::config_types::McpServerTransportConfig::Stdio {
+                    command: rmcp_test_server_bin,
+                    args: Vec::new(),
+                    env: None,
+                    env_vars: Vec::new(),
+                    cwd: None,
+                },
+                enabled: true,
+                startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
+                tool_timeout_sec: None,
+                enabled_tools: None,
+                disabled_tools: None,
+            },
+        );
+    });
+    let fixture = builder.build(&server).await?;
+
+    fixture
+        .submit_turn_with_policy(
+            "call the rmcp echo tool with a very large message",
+            SandboxPolicy::ReadOnly,
+        )
+        .await?;
+
+    // The MCP tool call output is converted to a function_call_output for the model.
+    let output = mock2
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("function_call_output present for rmcp call")?;
+
+    // Expect plain text with byte-based truncation marker.
+    assert!(
+        serde_json::from_str::<Value>(&output).is_err(),
+        "expected truncated MCP output to be plain text"
+    );
+    assert!(
+        output.starts_with("Total output lines: 1\n\n{"),
+        "expected total line header and JSON head, got: {output}"
+    );
+    let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]")
+        .expect("compile regex");
+    assert!(
+        byte_marker.is_match(&output),
+        "expected byte truncation marker, got: {output}"
+    );
+
+    Ok(())
+}