Files
llmx/codex-rs/core/src/context_manager/truncate.rs
Ahmed Ibrahim 1a89f70015 refactor Conversation history file into its own directory (#6229)
This is just a refactor of `conversation_history` file by breaking it up
into multiple smaller ones with helper. This refactor will help us move
more functionality related to context management here. in a clean way.
2025-11-05 10:49:35 -08:00

129 lines
4.9 KiB
Rust

use codex_protocol::models::FunctionCallOutputContentItem;
use codex_utils_string::take_bytes_at_char_boundary;
use codex_utils_string::take_last_bytes_at_char_boundary;
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
pub(crate) fn globally_truncate_function_output_items(
items: &[FunctionCallOutputContentItem],
) -> Vec<FunctionCallOutputContentItem> {
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
let mut remaining = MODEL_FORMAT_MAX_BYTES;
let mut omitted_text_items = 0usize;
for it in items {
match it {
FunctionCallOutputContentItem::InputText { text } => {
if remaining == 0 {
omitted_text_items += 1;
continue;
}
let len = text.len();
if len <= remaining {
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
remaining -= len;
} else {
let slice = take_bytes_at_char_boundary(text, remaining);
if !slice.is_empty() {
out.push(FunctionCallOutputContentItem::InputText {
text: slice.to_string(),
});
}
remaining = 0;
}
}
// todo(aibrahim): handle input images; resize
FunctionCallOutputContentItem::InputImage { image_url } => {
out.push(FunctionCallOutputContentItem::InputImage {
image_url: image_url.clone(),
});
}
}
}
if omitted_text_items > 0 {
out.push(FunctionCallOutputContentItem::InputText {
text: format!("[omitted {omitted_text_items} text items ...]"),
});
}
out
}
pub(crate) fn format_output_for_model_body(content: &str) -> String {
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return content.to_string();
}
let output = truncate_formatted_exec_output(content, total_lines);
format!("Total output lines: {total_lines}\n\n{output}")
}
fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
let segments: Vec<&str> = content.split_inclusive('\n').collect();
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
let omitted = segments.len().saturating_sub(head_take + tail_take);
let head_slice_end: usize = segments
.iter()
.take(head_take)
.map(|segment| segment.len())
.sum();
let tail_slice_start: usize = if tail_take == 0 {
content.len()
} else {
content.len()
- segments
.iter()
.rev()
.take(tail_take)
.map(|segment| segment.len())
.sum::<usize>()
};
let head_slice = &content[..head_slice_end];
let tail_slice = &content[tail_slice_start..];
let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
let marker = if omitted > 0 {
Some(format!(
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
))
} else if truncated_by_bytes {
Some(format!(
"\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
))
} else {
None
};
let marker_len = marker.as_ref().map_or(0, String::len);
let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
result.push_str(head_part);
if let Some(marker_text) = marker.as_ref() {
result.push_str(marker_text);
}
let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
if remaining == 0 {
return result;
}
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
result.push_str(tail_part);
result
}