use codex_protocol::models::FunctionCallOutputContentItem; use codex_utils_string::take_bytes_at_char_boundary; use codex_utils_string::take_last_bytes_at_char_boundary; // Model-formatting limits: clients get full streams; only content sent to the model is truncated. pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2; pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128 pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2; pub(crate) fn globally_truncate_function_output_items( items: &[FunctionCallOutputContentItem], ) -> Vec { let mut out: Vec = Vec::with_capacity(items.len()); let mut remaining = MODEL_FORMAT_MAX_BYTES; let mut omitted_text_items = 0usize; for it in items { match it { FunctionCallOutputContentItem::InputText { text } => { if remaining == 0 { omitted_text_items += 1; continue; } let len = text.len(); if len <= remaining { out.push(FunctionCallOutputContentItem::InputText { text: text.clone() }); remaining -= len; } else { let slice = take_bytes_at_char_boundary(text, remaining); if !slice.is_empty() { out.push(FunctionCallOutputContentItem::InputText { text: slice.to_string(), }); } remaining = 0; } } // todo(aibrahim): handle input images; resize FunctionCallOutputContentItem::InputImage { image_url } => { out.push(FunctionCallOutputContentItem::InputImage { image_url: image_url.clone(), }); } } } if omitted_text_items > 0 { out.push(FunctionCallOutputContentItem::InputText { text: format!("[omitted {omitted_text_items} text items ...]"), }); } out } pub(crate) fn format_output_for_model_body(content: &str) -> String { // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. let total_lines = content.lines().count(); if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { return content.to_string(); } let output = truncate_formatted_exec_output(content, total_lines); format!("Total output lines: {total_lines}\n\n{output}") } fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String { let segments: Vec<&str> = content.split_inclusive('\n').collect(); let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len()); let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take)); let omitted = segments.len().saturating_sub(head_take + tail_take); let head_slice_end: usize = segments .iter() .take(head_take) .map(|segment| segment.len()) .sum(); let tail_slice_start: usize = if tail_take == 0 { content.len() } else { content.len() - segments .iter() .rev() .take(tail_take) .map(|segment| segment.len()) .sum::() }; let head_slice = &content[..head_slice_end]; let tail_slice = &content[tail_slice_start..]; let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES; // this is a bit wrong. We are counting metadata lines and not just shell output lines. let marker = if omitted > 0 { Some(format!( "\n[... omitted {omitted} of {total_lines} lines ...]\n\n" )) } else if truncated_by_bytes { Some(format!( "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n" )) } else { None }; let marker_len = marker.as_ref().map_or(0, String::len); let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES); let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len)); let head_part = take_bytes_at_char_boundary(head_slice, head_budget); let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len())); result.push_str(head_part); if let Some(marker_text) = marker.as_ref() { result.push_str(marker_text); } let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len()); if remaining == 0 { return result; } let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); result.push_str(tail_part); result }