Add /compact (#1527)

- Add operation to summarize the context so far. - The operation runs a compact task that summarizes the context. - The operation clear the previous context to free the context window - The operation didn't use `run_task` to avoid corrupting the session - Add /compact in the tui https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
2025-07-31 21:34:32 -07:00
parent ad0295b893
commit e2c994e32a
8 changed files with 474 additions and 3 deletions
--- a/SUMMARY.md
+++ b/SUMMARY.md
@@ -0,0 +1,21 @@
 You are a summarization assistant. A conversation follows between a user and a coding-focused AI (Codex). Your task is to generate a clear summary capturing:
 • High-level objective or problem being solved  
 • Key instructions or design decisions given by the user  
 • Main code actions or behaviors from the AI  
 • Important variables, functions, modules, or outputs discussed  
 • Any unresolved questions or next steps
 Produce the summary in a structured format like:
 **Objective:** …
 **User instructions:** … (bulleted)
 **AI actions / code behavior:** … (bulleted)
 **Important entities:** … (e.g. function names, variables, files)
 **Open issues / next steps:** … (if any)
 **Summary (concise):** (one or two sentences)
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -442,6 +442,12 @@ impl Session {
        let _ = self.tx_event.send(event).await;
    }
    /// Build the full turn input by concatenating the current conversation
    /// history with additional items for this turn.
    pub fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
        [self.state.lock().unwrap().history.contents(), extra].concat()
    }
    /// Returns the input if there was no task running to inject into
    pub fn inject_input(&self, input: Vec<InputItem>) -> Result<(), Vec<InputItem>> {
        let mut state = self.state.lock().unwrap();
@@ -564,6 +570,25 @@ impl AgentTask {
            handle,
        }
    }
    fn compact(
        sess: Arc<Session>,
        sub_id: String,
        input: Vec<InputItem>,
        compact_instructions: String,
    ) -> Self {
        let handle = tokio::spawn(run_compact_task(
            Arc::clone(&sess),
            sub_id.clone(),
            input,
            compact_instructions,
        ))
        .abort_handle();
        Self {
            sess,
            sub_id,
            handle,
        }
    }
    fn abort(self) {
        if !self.handle.is_finished() {
@@ -884,6 +909,31 @@ async fn submission_loop(
                    }
                });
            }
            Op::Compact => {
                let sess = match sess.as_ref() {
                    Some(sess) => sess,
                    None => {
                        send_no_session_event(sub.id).await;
                        continue;
                    }
                };
                // Create a summarization request as user input
                const SUMMARIZATION_PROMPT: &str = include_str!("../../../SUMMARY.md");
                // Attempt to inject input into current task
                if let Err(items) = sess.inject_input(vec![InputItem::Text {
                    text: "Start Summarization".to_string(),
                }]) {
                    let task = AgentTask::compact(
                        sess.clone(),
                        sub.id,
                        items,
                        SUMMARIZATION_PROMPT.to_string(),
                    );
                    sess.set_task(task);
                }
            }
            Op::Shutdown => {
                info!("Shutting down Codex instance");
@@ -945,7 +995,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
        return;
    }
-    let initial_input_for_turn = ResponseInputItem::from(input);
+    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
    sess.record_conversation_items(&[initial_input_for_turn.clone().into()])
        .await;
@@ -966,8 +1016,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
        // conversation history on each turn. The rollout file, however, should
        // only record the new items that originated in this turn so that it
        // represents an append-only log without duplicates.
-        let turn_input: Vec<ResponseItem> =
+        let turn_input: Vec<ResponseItem> = sess.turn_input_with_history(pending_input);
            [sess.state.lock().unwrap().history.contents(), pending_input].concat();
        let turn_input_messages: Vec<String> = turn_input
            .iter()
@@ -1293,6 +1342,88 @@ async fn try_run_turn(
    }
 }
 async fn run_compact_task(
    sess: Arc<Session>,
    sub_id: String,
    input: Vec<InputItem>,
    compact_instructions: String,
 ) {
    let start_event = Event {
        id: sub_id.clone(),
        msg: EventMsg::TaskStarted,
    };
    if sess.tx_event.send(start_event).await.is_err() {
        return;
    }
    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
    let turn_input: Vec<ResponseItem> =
        sess.turn_input_with_history(vec![initial_input_for_turn.clone().into()]);
    let prompt = Prompt {
        input: turn_input,
        user_instructions: None,
        store: !sess.disable_response_storage,
        extra_tools: HashMap::new(),
        base_instructions_override: Some(compact_instructions.clone()),
    };
    let max_retries = sess.client.get_provider().stream_max_retries();
    let mut retries = 0;
    loop {
        let attempt_result = drain_to_completed(&sess, &prompt).await;
        match attempt_result {
            Ok(()) => break,
            Err(CodexErr::Interrupted) => return,
            Err(e) => {
                if retries < max_retries {
                    retries += 1;
                    let delay = backoff(retries);
                    sess.notify_background_event(
                        &sub_id,
                        format!(
                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
                        ),
                    )
                    .await;
                    tokio::time::sleep(delay).await;
                    continue;
                } else {
                    let event = Event {
                        id: sub_id.clone(),
                        msg: EventMsg::Error(ErrorEvent {
                            message: e.to_string(),
                        }),
                    };
                    sess.send_event(event).await;
                    return;
                }
            }
        }
    }
    sess.remove_task(&sub_id);
    let event = Event {
        id: sub_id.clone(),
        msg: EventMsg::AgentMessage(AgentMessageEvent {
            message: "Compact task completed".to_string(),
        }),
    };
    sess.send_event(event).await;
    let event = Event {
        id: sub_id.clone(),
        msg: EventMsg::TaskComplete(TaskCompleteEvent {
            last_agent_message: None,
        }),
    };
    sess.send_event(event).await;
    let mut state = sess.state.lock().unwrap();
    state.history.keep_last_messages(1);
 }
 async fn handle_response_item(
    sess: &Session,
    sub_id: &str,
@@ -1858,3 +1989,20 @@ fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<St
        }
    })
 }
 async fn drain_to_completed(sess: &Session, prompt: &Prompt) -> CodexResult<()> {
    let mut stream = sess.client.clone().stream(prompt).await?;
    loop {
        let maybe_event = stream.next().await;
        let Some(event) = maybe_event else {
            return Err(CodexErr::Stream(
                "stream closed before response.completed".into(),
            ));
        };
        match event {
            Ok(ResponseEvent::Completed { .. }) => return Ok(()),
            Ok(_) => continue,
            Err(e) => return Err(e),
        }
    }
 }
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -30,6 +30,34 @@ impl ConversationHistory {
            }
        }
    }
    pub(crate) fn keep_last_messages(&mut self, n: usize) {
        if n == 0 {
            self.items.clear();
            return;
        }
        // Collect the last N message items (assistant/user), newest to oldest.
        let mut kept: Vec<ResponseItem> = Vec::with_capacity(n);
        for item in self.items.iter().rev() {
            if let ResponseItem::Message { role, content, .. } = item {
                kept.push(ResponseItem::Message {
                    // we need to remove the id or the model will complain that messages are sent without
                    // their reasonings
                    id: None,
                    role: role.clone(),
                    content: content.clone(),
                });
                if kept.len() == n {
                    break;
                }
            }
        }
        // Preserve chronological order (oldest to newest) within the kept slice.
        kept.reverse();
        self.items = kept;
    }
 }
 /// Anything that is not a system message or "reasoning" message is considered
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -121,6 +121,10 @@ pub enum Op {
    /// Request a single history entry identified by `log_id` + `offset`.
    GetHistoryEntryRequest { offset: usize, log_id: u64 },
    /// Request the agent to summarize the current conversation context.
    /// The agent will use its existing context (either conversation history or previous response id)
    /// to generate a summary which will be returned as an AgentMessage event.
    Compact,
    /// Request to shut down codex instance.
    Shutdown,
 }
--- a/codex-rs/core/tests/compact.rs
+++ b/codex-rs/core/tests/compact.rs
@@ -0,0 +1,254 @@
 #![expect(clippy::unwrap_used)]
 use codex_core::Codex;
 use codex_core::CodexSpawnOk;
 use codex_core::ModelProviderInfo;
 use codex_core::built_in_model_providers;
 use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use codex_login::CodexAuth;
 use core_test_support::load_default_config_for_test;
 use core_test_support::wait_for_event;
 use serde_json::Value;
 use tempfile::TempDir;
 use wiremock::Mock;
 use wiremock::MockServer;
 use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path;
 use pretty_assertions::assert_eq;
 // --- Test helpers -----------------------------------------------------------
 /// Build an SSE stream body from a list of JSON events.
 fn sse(events: Vec<Value>) -> String {
    use std::fmt::Write as _;
    let mut out = String::new();
    for ev in events {
        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
        writeln!(&mut out, "event: {kind}").unwrap();
        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
            write!(&mut out, "data: {ev}\n\n").unwrap();
        } else {
            out.push('\n');
        }
    }
    out
 }
 /// Convenience: SSE event for a completed response with a specific id.
 fn ev_completed(id: &str) -> Value {
    serde_json::json!({
        "type": "response.completed",
        "response": {
            "id": id,
            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
        }
    })
 }
 /// Convenience: SSE event for a single assistant message output item.
 fn ev_assistant_message(id: &str, text: &str) -> Value {
    serde_json::json!({
        "type": "response.output_item.done",
        "item": {
            "type": "message",
            "role": "assistant",
            "id": id,
            "content": [{"type": "output_text", "text": text}]
        }
    })
 }
 fn sse_response(body: String) -> ResponseTemplate {
    ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
        .set_body_raw(body, "text/event-stream")
 }
 async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
 where
    M: wiremock::Match + Send + Sync + 'static,
 {
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(matcher)
        .respond_with(sse_response(body))
        .expect(1)
        .mount(server)
        .await;
 }
 const FIRST_REPLY: &str = "FIRST_REPLY";
 const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
 const SUMMARIZE_TRIGGER: &str = "Start Summarization";
 const THIRD_USER_MSG: &str = "next turn";
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }
    // Set up a mock server that we can inspect after the run.
    let server = MockServer::start().await;
    // SSE 1: assistant replies normally so it is recorded in history.
    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
        ev_completed("r1"),
    ]);
    // SSE 2: summarizer returns a summary message.
    let sse2 = sse(vec![
        ev_assistant_message("m2", SUMMARY_TEXT),
        ev_completed("r2"),
    ]);
    // SSE 3: minimal completed; we only need to capture the request body.
    let sse3 = sse(vec![ev_completed("r3")]);
    // Mount three expectations, one per request, matched by body content.
    let first_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("\"text\":\"hello world\"")
            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
    };
    mount_sse_once(&server, first_matcher, sse1).await;
    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
    };
    mount_sse_once(&server, second_matcher, sse2).await;
    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
    };
    mount_sse_once(&server, third_matcher, sse3).await;
    // Build config pointing to the mock server and spawn Codex.
    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
        ..built_in_model_providers()["openai"].clone()
    };
    let home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&home);
    config.model_provider = model_provider;
    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
    let CodexSpawnOk { codex, .. } = Codex::spawn(
        config,
        Some(CodexAuth::from_api_key("dummy".to_string())),
        ctrl_c.clone(),
    )
    .await
    .unwrap();
    // 1) Normal user input – should hit server once.
    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: "hello world".into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
    // 2) Summarize – second hit with summarization instructions.
    codex.submit(Op::Compact).await.unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
    // 3) Next user input – third hit; history should include only the summary.
    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: THIRD_USER_MSG.into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
    // Inspect the three captured requests.
    let requests = server.received_requests().await.unwrap();
    assert_eq!(requests.len(), 3, "expected exactly three requests");
    let req1 = &requests[0];
    let req2 = &requests[1];
    let req3 = &requests[2];
    let body1 = req1.body_json::<serde_json::Value>().unwrap();
    let body2 = req2.body_json::<serde_json::Value>().unwrap();
    let body3 = req3.body_json::<serde_json::Value>().unwrap();
    // System instructions should change for the summarization turn.
    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
    let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
    assert_ne!(
        instr1, instr2,
        "summarization should override base instructions"
    );
    assert!(
        instr2.contains("You are a summarization assistant"),
        "summarization instructions not applied"
    );
    // The summarization request should include the injected user input marker.
    let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
    // The last item is the user message created from the injected input.
    let last2 = input2.last().unwrap();
    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
    let text2 = last2["content"][0]["text"].as_str().unwrap();
    assert!(text2.contains(SUMMARIZE_TRIGGER));
    // Third request must contain only the summary from step 2 as prior history plus new user msg.
    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
    println!("third request body: {body3}");
    assert!(
        input3.len() >= 2,
        "expected summary + new user message in third request"
    );
    // Collect all (role, text) message tuples.
    let mut messages: Vec<(String, String)> = Vec::new();
    for item in input3 {
        if item["type"].as_str() == Some("message") {
            let role = item["role"].as_str().unwrap_or_default().to_string();
            let text = item["content"][0]["text"]
                .as_str()
                .unwrap_or_default()
                .to_string();
            messages.push((role, text));
        }
    }
    // Exactly one assistant message should remain after compaction and the new user message is present.
    let assistant_count = messages.iter().filter(|(r, _)| r == "assistant").count();
    assert_eq!(
        assistant_count, 1,
        "exactly one assistant message should remain after compaction"
    );
    assert!(
        messages
            .iter()
            .any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
        "third request should include the new user message"
    );
    assert!(
        !messages.iter().any(|(_, t)| t.contains("hello world")),
        "third request should not include the original user input"
    );
    assert!(
        !messages.iter().any(|(_, t)| t.contains(SUMMARIZE_TRIGGER)),
        "third request should not include the summarize trigger"
    );
 }
--- a/codex-rs/tui/src/app.rs
+++ b/codex-rs/tui/src/app.rs
@@ -10,6 +10,8 @@ use crate::tui;
 use codex_core::config::Config;
 use codex_core::protocol::Event;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecApprovalRequestEvent;
 use codex_core::protocol::Op;
 use color_eyre::eyre::Result;
 use crossterm::SynchronizedUpdate;
 use crossterm::event::KeyCode;
@@ -298,6 +300,12 @@ impl App<'_> {
                        self.app_state = AppState::Chat { widget: new_widget };
                        self.app_event_tx.send(AppEvent::RequestRedraw);
                    }
                    SlashCommand::Compact => {
                        if let AppState::Chat { widget } = &mut self.app_state {
                            widget.clear_token_usage();
                            self.app_event_tx.send(AppEvent::CodexOp(Op::Compact));
                        }
                    }
                    SlashCommand::Quit => {
                        break;
                    }
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -502,6 +502,12 @@ impl ChatWidget<'_> {
    pub(crate) fn token_usage(&self) -> &TokenUsage {
        &self.token_usage
    }
    pub(crate) fn clear_token_usage(&mut self) {
        self.token_usage = TokenUsage::default();
        self.bottom_pane
            .set_token_usage(self.token_usage.clone(), self.config.model_context_window);
    }
 }
 impl WidgetRef for &ChatWidget<'_> {
--- a/codex-rs/tui/src/slash_command.rs
+++ b/codex-rs/tui/src/slash_command.rs
@@ -13,6 +13,7 @@ pub enum SlashCommand {
    // DO NOT ALPHA-SORT! Enum order is presentation order in the popup, so
    // more frequently used commands should be listed first.
    New,
    Compact,
    Diff,
    Quit,
    #[cfg(debug_assertions)]
@@ -24,6 +25,7 @@ impl SlashCommand {
    pub fn description(self) -> &'static str {
        match self {
            SlashCommand::New => "Start a new chat.",
            SlashCommand::Compact => "Compact the chat history.",
            SlashCommand::Quit => "Exit the application.",
            SlashCommand::Diff => {
                "Show git diff of the working directory (including untracked files)"