feat: context compaction (#3446)

## Compact feature: 1. Stops the model when the context window become too large 2. Add a user turn, asking for the model to summarize 3. Build a bridge that contains all the previous user message + the summary. Rendered from a template 4. Start sampling again from a clean conversation with only that bridge
2025-09-12 13:07:10 -07:00
parent d4848e558b
commit ea225df22e
14 changed files with 1243 additions and 326 deletions
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -104,6 +104,12 @@ impl ModelClient {
            .or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window))
    }

+    pub fn get_auto_compact_token_limit(&self) -> Option<i64> {
+        self.config.model_auto_compact_token_limit.or_else(|| {
+            get_model_info(&self.config.model_family).and_then(|info| info.auto_compact_token_limit)
+        })
+    }
+
    /// Dispatches to either the Responses or Chat implementation depending on
    /// the provider config.  Public callers always invoke `stream()` – the
    /// specialised helpers are private to avoid accidental misuse.
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -16,7 +16,6 @@ use codex_apply_patch::ApplyPatchAction;
 use codex_apply_patch::MaybeApplyPatchVerified;
 use codex_apply_patch::maybe_parse_apply_patch_verified;
 use codex_protocol::mcp_protocol::ConversationId;
-use codex_protocol::protocol::CompactedItem;
 use codex_protocol::protocol::ConversationPathResponseEvent;
 use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::TaskStartedEvent;
@@ -77,7 +76,6 @@ use crate::parse_command::parse_command;
 use crate::plan_tool::handle_update_plan;
 use crate::project_doc::get_user_instructions;
 use crate::protocol::AgentMessageDeltaEvent;
-use crate::protocol::AgentMessageEvent;
 use crate::protocol::AgentReasoningDeltaEvent;
 use crate::protocol::AgentReasoningRawContentDeltaEvent;
 use crate::protocol::AgentReasoningSectionBreakEvent;
@@ -102,6 +100,7 @@ use crate::protocol::SessionConfiguredEvent;
 use crate::protocol::StreamErrorEvent;
 use crate::protocol::Submission;
 use crate::protocol::TaskCompleteEvent;
+use crate::protocol::TokenUsage;
 use crate::protocol::TokenUsageInfo;
 use crate::protocol::TurnDiffEvent;
 use crate::protocol::WebSearchBeginEvent;
@@ -127,6 +126,8 @@ use codex_protocol::models::ResponseItem;
 use codex_protocol::models::ShellToolCallParams;
 use codex_protocol::protocol::InitialHistory;

+mod compact;
+
 // A convenience extension trait for acquiring mutex locks where poisoning is
 // unrecoverable and should abort the program. This avoids scattered `.unwrap()`
 // calls on `lock()` while still surfacing a clear panic message when a lock is
@@ -264,6 +265,7 @@ struct State {
    pending_input: Vec<ResponseInputItem>,
    history: ConversationHistory,
    token_info: Option<TokenUsageInfo>,
+    next_internal_sub_id: u64,
 }

 /// Context for an initialized model agent
@@ -534,6 +536,13 @@ impl Session {
        }
    }

+    fn next_internal_sub_id(&self) -> String {
+        let mut state = self.state.lock_unchecked();
+        let id = state.next_internal_sub_id;
+        state.next_internal_sub_id += 1;
+        format!("auto-compact-{id}")
+    }
+
    async fn record_initial_history(
        &self,
        turn_context: &TurnContext,
@@ -707,6 +716,21 @@ impl Session {
        }
    }

+    fn update_token_usage_info(
+        &self,
+        turn_context: &TurnContext,
+        token_usage: &Option<TokenUsage>,
+    ) -> Option<TokenUsageInfo> {
+        let mut state = self.state.lock_unchecked();
+        let info = TokenUsageInfo::new_or_append(
+            &state.token_info,
+            token_usage,
+            turn_context.client.get_model_context_window(),
+        );
+        state.token_info = info.clone();
+        info
+    }
+
    /// Record a user input item to conversation history and also persist a
    /// corresponding UserMessage EventMsg to rollout.
    async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
@@ -1026,8 +1050,7 @@ impl AgentTask {
            let sess = sess.clone();
            let sub_id = sub_id.clone();
            let tc = Arc::clone(&turn_context);
-            tokio::spawn(async move { run_task(sess, tc.as_ref(), sub_id, input).await })
-                .abort_handle()
+            tokio::spawn(async move { run_task(sess, tc, sub_id, input).await }).abort_handle()
        };
        Self {
            sess,
@@ -1048,7 +1071,7 @@ impl AgentTask {
            let sub_id = sub_id.clone();
            let tc = Arc::clone(&turn_context);
            tokio::spawn(async move {
-                run_compact_task(sess, tc.as_ref(), sub_id, input, compact_instructions).await
+                compact::run_compact_task(sess, tc, sub_id, input, compact_instructions).await
            })
            .abort_handle()
        };
@@ -1342,21 +1365,16 @@ async fn submission_loop(
                sess.send_event(event).await;
            }
            Op::Compact => {
-                // Create a summarization request as user input
-                const SUMMARIZATION_PROMPT: &str = include_str!("prompt_for_compact_command.md");
-
                // Attempt to inject input into current task
                if let Err(items) = sess.inject_input(vec![InputItem::Text {
-                    text: "Start Summarization".to_string(),
+                    text: compact::COMPACT_TRIGGER_TEXT.to_string(),
                }]) {
-                    let task = AgentTask::compact(
+                    compact::spawn_compact_task(
                        sess.clone(),
                        Arc::clone(&turn_context),
                        sub.id,
                        items,
-                        SUMMARIZATION_PROMPT.to_string(),
                    );
-                    sess.set_task(task);
                }
            }
            Op::Shutdown => {
@@ -1435,7 +1453,7 @@ async fn submission_loop(
 ///   conversation history and consider the task complete.
 async fn run_task(
    sess: Arc<Session>,
-    turn_context: &TurnContext,
+    turn_context: Arc<TurnContext>,
    sub_id: String,
    input: Vec<InputItem>,
 ) {
@@ -1458,6 +1476,7 @@ async fn run_task(
    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
    // many turns, from the perspective of the user, it is a single turn.
    let mut turn_diff_tracker = TurnDiffTracker::new();
+    let mut auto_compact_recently_attempted = false;

    loop {
        // Note that pending_input would be something like a message the user
@@ -1492,7 +1511,7 @@ async fn run_task(
            .collect();
        match run_turn(
            &sess,
-            turn_context,
+            turn_context.as_ref(),
            &mut turn_diff_tracker,
            sub_id.clone(),
            turn_input,
@@ -1500,9 +1519,23 @@ async fn run_task(
        .await
        {
            Ok(turn_output) => {
+                let TurnRunResult {
+                    processed_items,
+                    total_token_usage,
+                } = turn_output;
+                let limit = turn_context
+                    .client
+                    .get_auto_compact_token_limit()
+                    .unwrap_or(i64::MAX);
+                let total_usage_tokens = total_token_usage
+                    .as_ref()
+                    .map(|usage| usage.tokens_in_context_window());
+                let token_limit_reached = total_usage_tokens
+                    .map(|tokens| (tokens as i64) >= limit)
+                    .unwrap_or(false);
                let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
                let mut responses = Vec::<ResponseInputItem>::new();
-                for processed_response_item in turn_output {
+                for processed_response_item in processed_items {
                    let ProcessedResponseItem { item, response } = processed_response_item;
                    match (&item, &response) {
                        (ResponseItem::Message { role, .. }, None) if role == "assistant" => {
@@ -1599,8 +1632,31 @@ async fn run_task(
                        .await;
                }

+                if token_limit_reached {
+                    if auto_compact_recently_attempted {
+                        let limit_str = limit.to_string();
+                        let current_tokens = total_usage_tokens
+                            .map(|tokens| tokens.to_string())
+                            .unwrap_or_else(|| "unknown".to_string());
+                        let event = Event {
+                            id: sub_id.clone(),
+                            msg: EventMsg::Error(ErrorEvent {
+                                message: format!(
+                                    "Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
+                                ),
+                            }),
+                        };
+                        sess.send_event(event).await;
+                        break;
+                    }
+                    auto_compact_recently_attempted = true;
+                    compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
+                    continue;
+                }
+
+                auto_compact_recently_attempted = false;
+
                if responses.is_empty() {
-                    debug!("Turn completed");
                    last_agent_message = get_last_assistant_message_from_turn(
                        &items_to_record_in_conversation_history,
                    );
@@ -1611,6 +1667,7 @@ async fn run_task(
                    });
                    break;
                }
+                continue;
            }
            Err(e) => {
                info!("Turn error: {e:#}");
@@ -1640,7 +1697,7 @@ async fn run_turn(
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: String,
    input: Vec<ResponseItem>,
-) -> CodexResult<Vec<ProcessedResponseItem>> {
+) -> CodexResult<TurnRunResult> {
    let tools = get_openai_tools(
        &turn_context.tools_config,
        Some(sess.mcp_connection_manager.list_all_tools()),
@@ -1704,13 +1761,19 @@ struct ProcessedResponseItem {
    response: Option<ResponseInputItem>,
 }

+#[derive(Debug)]
+struct TurnRunResult {
+    processed_items: Vec<ProcessedResponseItem>,
+    total_token_usage: Option<TokenUsage>,
+}
+
 async fn try_run_turn(
    sess: &Session,
    turn_context: &TurnContext,
    turn_diff_tracker: &mut TurnDiffTracker,
    sub_id: &str,
    prompt: &Prompt,
-) -> CodexResult<Vec<ProcessedResponseItem>> {
+) -> CodexResult<TurnRunResult> {
    // call_ids that are part of this response.
    let completed_call_ids = prompt
        .input
@@ -1828,16 +1891,7 @@ async fn try_run_turn(
                response_id: _,
                token_usage,
            } => {
-                let info = {
-                    let mut st = sess.state.lock_unchecked();
-                    let info = TokenUsageInfo::new_or_append(
-                        &st.token_info,
-                        &token_usage,
-                        turn_context.client.get_model_context_window(),
-                    );
-                    st.token_info = info.clone();
-                    info
-                };
+                let info = sess.update_token_usage_info(turn_context, &token_usage);
                let _ = sess
                    .send_event(Event {
                        id: sub_id.to_string(),
@@ -1855,7 +1909,12 @@ async fn try_run_turn(
                    sess.send_event(event).await;
                }

-                return Ok(output);
+                let result = TurnRunResult {
+                    processed_items: output,
+                    total_token_usage: token_usage.clone(),
+                };
+
+                return Ok(result);
            }
            ResponseEvent::OutputTextDelta(delta) => {
                let event = Event {
@@ -1893,95 +1952,6 @@ async fn try_run_turn(
    }
 }

-async fn run_compact_task(
-    sess: Arc<Session>,
-    turn_context: &TurnContext,
-    sub_id: String,
-    input: Vec<InputItem>,
-    compact_instructions: String,
-) {
-    let model_context_window = turn_context.client.get_model_context_window();
-    let start_event = Event {
-        id: sub_id.clone(),
-        msg: EventMsg::TaskStarted(TaskStartedEvent {
-            model_context_window,
-        }),
-    };
-    sess.send_event(start_event).await;
-
-    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-    let turn_input: Vec<ResponseItem> =
-        sess.turn_input_with_history(vec![initial_input_for_turn.clone().into()]);
-
-    let prompt = Prompt {
-        input: turn_input,
-        tools: Vec::new(),
-        base_instructions_override: Some(compact_instructions.clone()),
-    };
-
-    let max_retries = turn_context.client.get_provider().stream_max_retries();
-    let mut retries = 0;
-
-    loop {
-        let attempt_result = drain_to_completed(&sess, turn_context, &sub_id, &prompt).await;
-
-        match attempt_result {
-            Ok(()) => break,
-            Err(CodexErr::Interrupted) => return,
-            Err(e) => {
-                if retries < max_retries {
-                    retries += 1;
-                    let delay = backoff(retries);
-                    sess.notify_stream_error(
-                        &sub_id,
-                        format!(
-                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
-                        ),
-                    )
-                    .await;
-                    tokio::time::sleep(delay).await;
-                    continue;
-                } else {
-                    let event = Event {
-                        id: sub_id.clone(),
-                        msg: EventMsg::Error(ErrorEvent {
-                            message: e.to_string(),
-                        }),
-                    };
-                    sess.send_event(event).await;
-                    return;
-                }
-            }
-        }
-    }
-
-    sess.remove_task(&sub_id);
-
-    let rollout_item = {
-        let mut state = sess.state.lock_unchecked();
-        state.history.keep_last_messages(1);
-        RolloutItem::Compacted(CompactedItem {
-            message: state.history.last_agent_message(),
-        })
-    };
-    sess.persist_rollout_items(&[rollout_item]).await;
-
-    let event = Event {
-        id: sub_id.clone(),
-        msg: EventMsg::AgentMessage(AgentMessageEvent {
-            message: "Compact task completed".to_string(),
-        }),
-    };
-    sess.send_event(event).await;
-    let event = Event {
-        id: sub_id.clone(),
-        msg: EventMsg::TaskComplete(TaskCompleteEvent {
-            last_agent_message: None,
-        }),
-    };
-    sess.send_event(event).await;
-}
-
 async fn handle_response_item(
    sess: &Session,
    turn_context: &TurnContext,
@@ -2964,7 +2934,7 @@ fn format_exec_output(exec_output: &ExecToolCallOutput) -> String {
    serde_json::to_string(&payload).expect("serialize ExecOutput")
 }

-fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
+pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
    responses.iter().rev().find_map(|item| {
        if let ResponseItem::Message { role, content, .. } = item {
            if role == "assistant" {
@@ -2983,68 +2953,6 @@ fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<St
        }
    })
 }
-
-async fn drain_to_completed(
-    sess: &Session,
-    turn_context: &TurnContext,
-    sub_id: &str,
-    prompt: &Prompt,
-) -> CodexResult<()> {
-    let rollout_item = RolloutItem::TurnContext(TurnContextItem {
-        cwd: turn_context.cwd.clone(),
-        approval_policy: turn_context.approval_policy,
-        sandbox_policy: turn_context.sandbox_policy.clone(),
-        model: turn_context.client.get_model(),
-        effort: turn_context.client.get_reasoning_effort(),
-        summary: turn_context.client.get_reasoning_summary(),
-    });
-    sess.persist_rollout_items(&[rollout_item]).await;
-    let mut stream = turn_context.client.clone().stream(prompt).await?;
-    loop {
-        let maybe_event = stream.next().await;
-        let Some(event) = maybe_event else {
-            return Err(CodexErr::Stream(
-                "stream closed before response.completed".into(),
-                None,
-            ));
-        };
-        match event {
-            Ok(ResponseEvent::OutputItemDone(item)) => {
-                // Record only to in-memory conversation history; avoid state snapshot.
-                let mut state = sess.state.lock_unchecked();
-                state.history.record_items(std::slice::from_ref(&item));
-            }
-            Ok(ResponseEvent::Completed {
-                response_id: _,
-                token_usage,
-            }) => {
-                let info = {
-                    let mut st = sess.state.lock_unchecked();
-                    let info = TokenUsageInfo::new_or_append(
-                        &st.token_info,
-                        &token_usage,
-                        turn_context.client.get_model_context_window(),
-                    );
-                    st.token_info = info.clone();
-                    info
-                };
-
-                sess.tx_event
-                    .send(Event {
-                        id: sub_id.to_string(),
-                        msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
-                    })
-                    .await
-                    .ok();
-
-                return Ok(());
-            }
-            Ok(_) => continue,
-            Err(e) => return Err(e),
-        }
-    }
-}
-
 fn convert_call_tool_result_to_function_call_output_payload(
    call_tool_result: &CallToolResult,
 ) -> FunctionCallOutputPayload {
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -0,0 +1,401 @@
+use std::sync::Arc;
+
+use super::AgentTask;
+use super::MutexExt;
+use super::Session;
+use super::TurnContext;
+use super::get_last_assistant_message_from_turn;
+use crate::Prompt;
+use crate::client_common::ResponseEvent;
+use crate::error::CodexErr;
+use crate::error::Result as CodexResult;
+use crate::protocol::AgentMessageEvent;
+use crate::protocol::CompactedItem;
+use crate::protocol::ErrorEvent;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+use crate::protocol::InputItem;
+use crate::protocol::InputMessageKind;
+use crate::protocol::TaskCompleteEvent;
+use crate::protocol::TaskStartedEvent;
+use crate::protocol::TurnContextItem;
+use crate::util::backoff;
+use askama::Template;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseInputItem;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::protocol::RolloutItem;
+use futures::prelude::*;
+
+pub(super) const COMPACT_TRIGGER_TEXT: &str = "Start Summarization";
+const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
+
+#[derive(Template)]
+#[template(path = "compact/history_bridge.md", escape = "none")]
+struct HistoryBridgeTemplate<'a> {
+    user_messages_text: &'a str,
+    summary_text: &'a str,
+}
+
+pub(super) fn spawn_compact_task(
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    sub_id: String,
+    input: Vec<InputItem>,
+) {
+    let task = AgentTask::compact(
+        sess.clone(),
+        turn_context,
+        sub_id,
+        input,
+        SUMMARIZATION_PROMPT.to_string(),
+    );
+    sess.set_task(task);
+}
+
+pub(super) async fn run_inline_auto_compact_task(
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+) {
+    let sub_id = sess.next_internal_sub_id();
+    let input = vec![InputItem::Text {
+        text: COMPACT_TRIGGER_TEXT.to_string(),
+    }];
+    run_compact_task_inner(
+        sess,
+        turn_context,
+        sub_id,
+        input,
+        SUMMARIZATION_PROMPT.to_string(),
+        false,
+    )
+    .await;
+}
+
+pub(super) async fn run_compact_task(
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    sub_id: String,
+    input: Vec<InputItem>,
+    compact_instructions: String,
+) {
+    run_compact_task_inner(
+        sess,
+        turn_context,
+        sub_id,
+        input,
+        compact_instructions,
+        true,
+    )
+    .await;
+}
+
+async fn run_compact_task_inner(
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    sub_id: String,
+    input: Vec<InputItem>,
+    compact_instructions: String,
+    remove_task_on_completion: bool,
+) {
+    let model_context_window = turn_context.client.get_model_context_window();
+    let start_event = Event {
+        id: sub_id.clone(),
+        msg: EventMsg::TaskStarted(TaskStartedEvent {
+            model_context_window,
+        }),
+    };
+    sess.send_event(start_event).await;
+
+    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
+    let instructions_override = compact_instructions;
+    let turn_input = sess.turn_input_with_history(vec![initial_input_for_turn.clone().into()]);
+
+    let prompt = Prompt {
+        input: turn_input,
+        tools: Vec::new(),
+        base_instructions_override: Some(instructions_override),
+    };
+
+    let max_retries = turn_context.client.get_provider().stream_max_retries();
+    let mut retries = 0;
+
+    let rollout_item = RolloutItem::TurnContext(TurnContextItem {
+        cwd: turn_context.cwd.clone(),
+        approval_policy: turn_context.approval_policy,
+        sandbox_policy: turn_context.sandbox_policy.clone(),
+        model: turn_context.client.get_model(),
+        effort: turn_context.client.get_reasoning_effort(),
+        summary: turn_context.client.get_reasoning_summary(),
+    });
+    sess.persist_rollout_items(&[rollout_item]).await;
+
+    loop {
+        let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;
+
+        match attempt_result {
+            Ok(()) => {
+                break;
+            }
+            Err(CodexErr::Interrupted) => {
+                return;
+            }
+            Err(e) => {
+                if retries < max_retries {
+                    retries += 1;
+                    let delay = backoff(retries);
+                    sess.notify_stream_error(
+                        &sub_id,
+                        format!(
+                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
+                        ),
+                    )
+                    .await;
+                    tokio::time::sleep(delay).await;
+                    continue;
+                } else {
+                    let event = Event {
+                        id: sub_id.clone(),
+                        msg: EventMsg::Error(ErrorEvent {
+                            message: e.to_string(),
+                        }),
+                    };
+                    sess.send_event(event).await;
+                    return;
+                }
+            }
+        }
+    }
+
+    if remove_task_on_completion {
+        sess.remove_task(&sub_id);
+    }
+    let history_snapshot = {
+        let state = sess.state.lock_unchecked();
+        state.history.contents()
+    };
+    let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
+    let user_messages = collect_user_messages(&history_snapshot);
+    let new_history =
+        build_compacted_history(&sess, turn_context.as_ref(), &user_messages, &summary_text);
+    {
+        let mut state = sess.state.lock_unchecked();
+        state.history.replace(new_history);
+    }
+
+    let rollout_item = RolloutItem::Compacted(CompactedItem {
+        message: summary_text.clone(),
+    });
+    sess.persist_rollout_items(&[rollout_item]).await;
+
+    let event = Event {
+        id: sub_id.clone(),
+        msg: EventMsg::AgentMessage(AgentMessageEvent {
+            message: "Compact task completed".to_string(),
+        }),
+    };
+    sess.send_event(event).await;
+    let event = Event {
+        id: sub_id.clone(),
+        msg: EventMsg::TaskComplete(TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    };
+    sess.send_event(event).await;
+}
+
+fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
+    let mut pieces = Vec::new();
+    for item in content {
+        match item {
+            ContentItem::InputText { text } | ContentItem::OutputText { text } => {
+                if !text.is_empty() {
+                    pieces.push(text.as_str());
+                }
+            }
+            ContentItem::InputImage { .. } => {}
+        }
+    }
+    if pieces.is_empty() {
+        None
+    } else {
+        Some(pieces.join("\n"))
+    }
+}
+
+fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
+    items
+        .iter()
+        .filter_map(|item| match item {
+            ResponseItem::Message { role, content, .. } if role == "user" => {
+                content_items_to_text(content)
+            }
+            _ => None,
+        })
+        .filter(|text| !is_session_prefix_message(text))
+        .collect()
+}
+
+fn is_session_prefix_message(text: &str) -> bool {
+    matches!(
+        InputMessageKind::from(("user", text)),
+        InputMessageKind::UserInstructions | InputMessageKind::EnvironmentContext
+    )
+}
+
+fn build_compacted_history(
+    sess: &Session,
+    turn_context: &TurnContext,
+    user_messages: &[String],
+    summary_text: &str,
+) -> Vec<ResponseItem> {
+    let mut history = sess.build_initial_context(turn_context);
+    let user_messages_text = if user_messages.is_empty() {
+        "(none)".to_string()
+    } else {
+        user_messages.join("\n\n")
+    };
+    let summary_text = if summary_text.is_empty() {
+        "(no summary available)".to_string()
+    } else {
+        summary_text.to_string()
+    };
+    let Ok(bridge) = HistoryBridgeTemplate {
+        user_messages_text: &user_messages_text,
+        summary_text: &summary_text,
+    }
+    .render() else {
+        return vec![];
+    };
+    history.push(ResponseItem::Message {
+        id: None,
+        role: "user".to_string(),
+        content: vec![ContentItem::InputText { text: bridge }],
+    });
+    history
+}
+
+async fn drain_to_completed(
+    sess: &Session,
+    turn_context: &TurnContext,
+    prompt: &Prompt,
+) -> CodexResult<()> {
+    let mut stream = turn_context.client.clone().stream(prompt).await?;
+    loop {
+        let maybe_event = stream.next().await;
+        let Some(event) = maybe_event else {
+            return Err(CodexErr::Stream(
+                "stream closed before response.completed".into(),
+                None,
+            ));
+        };
+        match event {
+            Ok(ResponseEvent::OutputItemDone(item)) => {
+                let mut state = sess.state.lock_unchecked();
+                state.history.record_items(std::slice::from_ref(&item));
+            }
+            Ok(ResponseEvent::Completed { .. }) => {
+                return Ok(());
+            }
+            Ok(_) => continue,
+            Err(e) => return Err(e),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn content_items_to_text_joins_non_empty_segments() {
+        let items = vec![
+            ContentItem::InputText {
+                text: "hello".to_string(),
+            },
+            ContentItem::OutputText {
+                text: String::new(),
+            },
+            ContentItem::OutputText {
+                text: "world".to_string(),
+            },
+        ];
+
+        let joined = content_items_to_text(&items);
+
+        assert_eq!(Some("hello\nworld".to_string()), joined);
+    }
+
+    #[test]
+    fn content_items_to_text_ignores_image_only_content() {
+        let items = vec![ContentItem::InputImage {
+            image_url: "file://image.png".to_string(),
+        }];
+
+        let joined = content_items_to_text(&items);
+
+        assert_eq!(None, joined);
+    }
+
+    #[test]
+    fn collect_user_messages_extracts_user_text_only() {
+        let items = vec![
+            ResponseItem::Message {
+                id: Some("assistant".to_string()),
+                role: "assistant".to_string(),
+                content: vec![ContentItem::OutputText {
+                    text: "ignored".to_string(),
+                }],
+            },
+            ResponseItem::Message {
+                id: Some("user".to_string()),
+                role: "user".to_string(),
+                content: vec![
+                    ContentItem::InputText {
+                        text: "first".to_string(),
+                    },
+                    ContentItem::OutputText {
+                        text: "second".to_string(),
+                    },
+                ],
+            },
+            ResponseItem::Other,
+        ];
+
+        let collected = collect_user_messages(&items);
+
+        assert_eq!(vec!["first\nsecond".to_string()], collected);
+    }
+
+    #[test]
+    fn collect_user_messages_filters_session_prefix_entries() {
+        let items = vec![
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText {
+                    text: "<user_instructions>do things</user_instructions>".to_string(),
+                }],
+            },
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText {
+                    text: "<ENVIRONMENT_CONTEXT>cwd=/tmp</ENVIRONMENT_CONTEXT>".to_string(),
+                }],
+            },
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText {
+                    text: "real user message".to_string(),
+                }],
+            },
+        ];
+
+        let collected = collect_user_messages(&items);
+
+        assert_eq!(vec!["real user message".to_string()], collected);
+    }
+}
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -55,6 +55,9 @@ pub struct Config {
    /// Maximum number of output tokens.
    pub model_max_output_tokens: Option<u64>,

+    /// Token usage threshold triggering auto-compaction of conversation history.
+    pub model_auto_compact_token_limit: Option<i64>,
+
    /// Key into the model_providers map that specifies which provider to use.
    pub model_provider_id: String,

@@ -519,6 +522,9 @@ pub struct ConfigToml {
    /// Maximum number of output tokens.
    pub model_max_output_tokens: Option<u64>,

+    /// Token usage threshold triggering auto-compaction of conversation history.
+    pub model_auto_compact_token_limit: Option<i64>,
+
    /// Default approval policy for executing commands.
    pub approval_policy: Option<AskForApproval>,

@@ -877,6 +883,11 @@ impl Config {
                .as_ref()
                .map(|info| info.max_output_tokens)
        });
+        let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| {
+            openai_model_info
+                .as_ref()
+                .and_then(|info| info.auto_compact_token_limit)
+        });

        let experimental_resume = cfg.experimental_resume;

@@ -896,6 +907,7 @@ impl Config {
            model_family,
            model_context_window,
            model_max_output_tokens,
+            model_auto_compact_token_limit,
            model_provider_id,
            model_provider,
            cwd: resolved_cwd,
@@ -1430,6 +1442,7 @@ model_verbosity = "high"
                model_family: find_family_for_model("o3").expect("known model slug"),
                model_context_window: Some(200_000),
                model_max_output_tokens: Some(100_000),
+                model_auto_compact_token_limit: None,
                model_provider_id: "openai".to_string(),
                model_provider: fixture.openai_provider.clone(),
                approval_policy: AskForApproval::Never,
@@ -1486,6 +1499,7 @@ model_verbosity = "high"
            model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
            model_context_window: Some(16_385),
            model_max_output_tokens: Some(4_096),
+            model_auto_compact_token_limit: None,
            model_provider_id: "openai-chat-completions".to_string(),
            model_provider: fixture.openai_chat_completions_provider.clone(),
            approval_policy: AskForApproval::UnlessTrusted,
@@ -1557,6 +1571,7 @@ model_verbosity = "high"
            model_family: find_family_for_model("o3").expect("known model slug"),
            model_context_window: Some(200_000),
            model_max_output_tokens: Some(100_000),
+            model_auto_compact_token_limit: None,
            model_provider_id: "openai".to_string(),
            model_provider: fixture.openai_provider.clone(),
            approval_policy: AskForApproval::OnFailure,
@@ -1614,6 +1629,7 @@ model_verbosity = "high"
            model_family: find_family_for_model("gpt-5").expect("known model slug"),
            model_context_window: Some(272_000),
            model_max_output_tokens: Some(128_000),
+            model_auto_compact_token_limit: None,
            model_provider_id: "openai".to_string(),
            model_provider: fixture.openai_provider.clone(),
            approval_policy: AskForApproval::OnFailure,
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -1,4 +1,3 @@
-use codex_protocol::models::ContentItem;
 use codex_protocol::models::ResponseItem;

 /// Transcript of conversation history
@@ -33,52 +32,8 @@ impl ConversationHistory {
        }
    }

-    pub(crate) fn keep_last_messages(&mut self, n: usize) {
-        if n == 0 {
-            self.items.clear();
-            return;
-        }
-
-        // Collect the last N message items (assistant/user), newest to oldest.
-        let mut kept: Vec<ResponseItem> = Vec::with_capacity(n);
-        for item in self.items.iter().rev() {
-            if let ResponseItem::Message { role, content, .. } = item {
-                kept.push(ResponseItem::Message {
-                    // we need to remove the id or the model will complain that messages are sent without
-                    // their reasonings
-                    id: None,
-                    role: role.clone(),
-                    content: content.clone(),
-                });
-                if kept.len() == n {
-                    break;
-                }
-            }
-        }
-
-        // Preserve chronological order (oldest to newest) within the kept slice.
-        kept.reverse();
-        self.items = kept;
-    }
-
-    pub(crate) fn last_agent_message(&self) -> String {
-        for item in self.items.iter().rev() {
-            if let ResponseItem::Message { role, content, .. } = item
-                && role == "assistant"
-            {
-                return content
-                    .iter()
-                    .find_map(|ci| {
-                        if let ContentItem::OutputText { text } = ci {
-                            Some(text.clone())
-                        } else {
-                            None
-                        }
-                    })
-                    .unwrap_or_default();
-            }
-        }
-        String::new()
+    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
+        self.items = items;
    }
 }

--- a/codex-rs/core/src/openai_model_info.rs
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -12,6 +12,19 @@ pub(crate) struct ModelInfo {

    /// Maximum number of output tokens that can be generated for the model.
    pub(crate) max_output_tokens: u64,
+
+    /// Token threshold where we should automatically compact conversation history.
+    pub(crate) auto_compact_token_limit: Option<i64>,
+}
+
+impl ModelInfo {
+    const fn new(context_window: u64, max_output_tokens: u64) -> Self {
+        Self {
+            context_window,
+            max_output_tokens,
+            auto_compact_token_limit: None,
+        }
+    }
 }

 pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
@@ -20,73 +33,37 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
        // OSS models have a 128k shared token pool.
        // Arbitrarily splitting it: 3/4 input context, 1/4 output.
        // https://openai.com/index/gpt-oss-model-card/
-        "gpt-oss-20b" => Some(ModelInfo {
-            context_window: 96_000,
-            max_output_tokens: 32_000,
-        }),
-        "gpt-oss-120b" => Some(ModelInfo {
-            context_window: 96_000,
-            max_output_tokens: 32_000,
-        }),
+        "gpt-oss-20b" => Some(ModelInfo::new(96_000, 32_000)),
+        "gpt-oss-120b" => Some(ModelInfo::new(96_000, 32_000)),
        // https://platform.openai.com/docs/models/o3
-        "o3" => Some(ModelInfo {
-            context_window: 200_000,
-            max_output_tokens: 100_000,
-        }),
+        "o3" => Some(ModelInfo::new(200_000, 100_000)),

        // https://platform.openai.com/docs/models/o4-mini
-        "o4-mini" => Some(ModelInfo {
-            context_window: 200_000,
-            max_output_tokens: 100_000,
-        }),
+        "o4-mini" => Some(ModelInfo::new(200_000, 100_000)),

        // https://platform.openai.com/docs/models/codex-mini-latest
-        "codex-mini-latest" => Some(ModelInfo {
-            context_window: 200_000,
-            max_output_tokens: 100_000,
-        }),
+        "codex-mini-latest" => Some(ModelInfo::new(200_000, 100_000)),

        // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
        // https://platform.openai.com/docs/models/gpt-4.1
-        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
-            context_window: 1_047_576,
-            max_output_tokens: 32_768,
-        }),
+        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576, 32_768)),

        // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
        // https://platform.openai.com/docs/models/gpt-4o
-        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
-            context_window: 128_000,
-            max_output_tokens: 16_384,
-        }),
+        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000, 16_384)),

        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
-        "gpt-4o-2024-05-13" => Some(ModelInfo {
-            context_window: 128_000,
-            max_output_tokens: 4_096,
-        }),
+        "gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000, 4_096)),

        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
-        "gpt-4o-2024-11-20" => Some(ModelInfo {
-            context_window: 128_000,
-            max_output_tokens: 16_384,
-        }),
+        "gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000, 16_384)),

        // https://platform.openai.com/docs/models/gpt-3.5-turbo
-        "gpt-3.5-turbo" => Some(ModelInfo {
-            context_window: 16_385,
-            max_output_tokens: 4_096,
-        }),
+        "gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),

-        _ if slug.starts_with("gpt-5") => Some(ModelInfo {
-            context_window: 272_000,
-            max_output_tokens: 128_000,
-        }),
+        _ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),

-        _ if slug.starts_with("codex-") => Some(ModelInfo {
-            context_window: 272_000,
-            max_output_tokens: 128_000,
-        }),
+        _ if slug.starts_with("codex-") => Some(ModelInfo::new(272_000, 128_000)),

        _ => None,
    }
--- a/codex-rs/core/src/prompt_for_compact_command.md
+++ b/codex-rs/core/src/prompt_for_compact_command.md
@@ -1,21 +0,0 @@
-You are a summarization assistant. A conversation follows between a user and a coding-focused AI (Codex). Your task is to generate a clear summary capturing:
-
-• High-level objective or problem being solved  
-• Key instructions or design decisions given by the user  
-• Main code actions or behaviors from the AI  
-• Important variables, functions, modules, or outputs discussed  
-• Any unresolved questions or next steps
-
-Produce the summary in a structured format like:
-
-**Objective:** …
-
-**User instructions:** … (bulleted)
-
-**AI actions / code behavior:** … (bulleted)
-
-**Important entities:** … (e.g. function names, variables, files)
-
-**Open issues / next steps:** … (if any)
-
-**Summary (concise):** (one or two sentences)
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -421,7 +421,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: None,
                input_chunks: &["bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        let session_id = open_shell.session_id.expect("expected session_id");
@@ -441,7 +441,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: Some(session_id),
                input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        assert!(out_2.output.contains("codex"));
@@ -458,7 +458,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: None,
                input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        let session_a = shell_a.session_id.expect("expected session id");
@@ -467,7 +467,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: Some(session_a),
                input_chunks: &["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;

@@ -478,7 +478,7 @@ mod tests {
                    "echo".to_string(),
                    "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
                ],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        assert!(!out_2.output.contains("codex"));
@@ -487,7 +487,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: Some(session_a),
                input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        assert!(out_3.output.contains("codex"));
@@ -504,7 +504,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: None,
                input_chunks: &["bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        let session_id = open_shell.session_id.expect("expected session id");
@@ -516,7 +516,7 @@ mod tests {
                    "export".to_string(),
                    "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
                ],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;

@@ -574,7 +574,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: None,
                input_chunks: &["/bin/echo".to_string(), "codex".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;

@@ -595,7 +595,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: None,
                input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;
        let session_id = open_shell.session_id.expect("expected session id");
@@ -604,7 +604,7 @@ mod tests {
            .handle_request(UnifiedExecRequest {
                session_id: Some(session_id),
                input_chunks: &["exit\n".to_string()],
-                timeout_ms: Some(1_500),
+                timeout_ms: Some(2_500),
            })
            .await?;