feat: truncate on compact (#4942)

Truncate the message during compaction if it is just too large
Do it iteratively as tokenization is basically free on server-side
This commit is contained in:
jif-oai
2025-10-08 18:11:08 +01:00
committed by GitHub
parent fe8122e514
commit 687a13bbe5
2 changed files with 148 additions and 6 deletions

View File

@@ -70,14 +70,10 @@ async fn run_compact_task_inner(
input: Vec<InputItem>,
) {
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
let turn_input = sess
let mut turn_input = sess
.turn_input_with_history(vec![initial_input_for_turn.clone().into()])
.await;
let prompt = Prompt {
input: turn_input,
..Default::default()
};
let mut truncated_count = 0usize;
let max_retries = turn_context.client.get_provider().stream_max_retries();
let mut retries = 0;
@@ -93,17 +89,36 @@ async fn run_compact_task_inner(
sess.persist_rollout_items(&[rollout_item]).await;
loop {
let prompt = Prompt {
input: turn_input.clone(),
..Default::default()
};
let attempt_result =
drain_to_completed(&sess, turn_context.as_ref(), &sub_id, &prompt).await;
match attempt_result {
Ok(()) => {
if truncated_count > 0 {
sess.notify_background_event(
&sub_id,
format!(
"Trimmed {truncated_count} older conversation item(s) before compacting so the prompt fits the model context window."
),
)
.await;
}
break;
}
Err(CodexErr::Interrupted) => {
return;
}
Err(e @ CodexErr::ContextWindowExceeded) => {
if turn_input.len() > 1 {
turn_input.remove(0);
truncated_count += 1;
retries = 0;
continue;
}
sess.set_total_tokens_full(&sub_id, turn_context.as_ref())
.await;
let event = Event {