From b84a920067aa87aea2cb0a7002050aa64648b324 Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Tue, 23 Sep 2025 17:59:17 +0100
Subject: [PATCH] chore: compact do not modify instructions  (#4088)

Keep the developer instruction and insert the summarisation message as a
user message instead
---
 codex-rs/core/src/codex.rs                    |  9 +-
 codex-rs/core/src/codex/compact.rs            | 31 ++-----
 codex-rs/core/tests/suite/compact.rs          | 86 +++++++++++++------
 .../core/tests/suite/compact_resume_fork.rs   | 24 +++---
 4 files changed, 81 insertions(+), 69 deletions(-)
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index b89ebc73..49ab6940 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1117,16 +1117,13 @@ impl AgentTask {
         turn_context: Arc<TurnContext>,
         sub_id: String,
         input: Vec<InputItem>,
-        compact_instructions: String,
     ) -> Self {
         let handle = {
             let sess = sess.clone();
             let sub_id = sub_id.clone();
             let tc = Arc::clone(&turn_context);
-            tokio::spawn(async move {
-                compact::run_compact_task(sess, tc, sub_id, input, compact_instructions).await
-            })
-            .abort_handle()
+            tokio::spawn(async move { compact::run_compact_task(sess, tc, sub_id, input).await })
+                .abort_handle()
         };
         Self {
             sess,
@@ -1434,7 +1431,7 @@ async fn submission_loop(
                 // Attempt to inject input into current task
                 if let Err(items) = sess
                     .inject_input(vec![InputItem::Text {
-                        text: compact::COMPACT_TRIGGER_TEXT.to_string(),
+                        text: compact::SUMMARIZATION_PROMPT.to_string(),
                     }])
                     .await
                 {
diff --git a/codex-rs/core/src/codex/compact.rs b/codex-rs/core/src/codex/compact.rs
index d1547a48..a60466b9 100644
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -27,8 +27,7 @@ use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::RolloutItem;
 use futures::prelude::*;
 
-pub(super) const COMPACT_TRIGGER_TEXT: &str = "Start Summarization";
-const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
+pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
 const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
 
 #[derive(Template)]
@@ -44,13 +43,7 @@ pub(super) async fn spawn_compact_task(
     sub_id: String,
     input: Vec<InputItem>,
 ) {
-    let task = AgentTask::compact(
-        sess.clone(),
-        turn_context,
-        sub_id,
-        input,
-        SUMMARIZATION_PROMPT.to_string(),
-    );
+    let task = AgentTask::compact(sess.clone(), turn_context, sub_id, input);
     sess.set_task(task).await;
 }
 
@@ -60,17 +53,9 @@ pub(super) async fn run_inline_auto_compact_task(
 ) {
     let sub_id = sess.next_internal_sub_id();
     let input = vec![InputItem::Text {
-        text: COMPACT_TRIGGER_TEXT.to_string(),
+        text: SUMMARIZATION_PROMPT.to_string(),
     }];
-    run_compact_task_inner(
-        sess,
-        turn_context,
-        sub_id,
-        input,
-        SUMMARIZATION_PROMPT.to_string(),
-        false,
-    )
-    .await;
+    run_compact_task_inner(sess, turn_context, sub_id, input, None, false).await;
 }
 
 pub(super) async fn run_compact_task(
@@ -78,7 +63,6 @@ pub(super) async fn run_compact_task(
     turn_context: Arc<TurnContext>,
     sub_id: String,
     input: Vec<InputItem>,
-    compact_instructions: String,
 ) {
     let start_event = Event {
         id: sub_id.clone(),
@@ -92,7 +76,7 @@ pub(super) async fn run_compact_task(
         turn_context,
         sub_id.clone(),
         input,
-        compact_instructions,
+        None,
         true,
     )
     .await;
@@ -110,11 +94,10 @@ async fn run_compact_task_inner(
     turn_context: Arc<TurnContext>,
     sub_id: String,
     input: Vec<InputItem>,
-    compact_instructions: String,
+    instructions_override: Option<String>,
     remove_task_on_completion: bool,
 ) {
     let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-    let instructions_override = compact_instructions;
     let turn_input = sess
         .turn_input_with_history(vec![initial_input_for_turn.clone().into()])
         .await;
@@ -122,7 +105,7 @@ async fn run_compact_task_inner(
     let prompt = Prompt {
         input: turn_input,
         tools: Vec::new(),
-        base_instructions_override: Some(instructions_override),
+        base_instructions_override: instructions_override,
     };
 
     let max_retries = turn_context.client.get_provider().stream_max_retries();
diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
index 3cae9841..9e456e58 100644
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -19,6 +19,7 @@ use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path;
 
+use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use core_test_support::non_sandbox_test;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
@@ -37,7 +38,6 @@ use std::sync::atomic::Ordering;
 
 pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
 pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
-pub(super) const SUMMARIZE_TRIGGER: &str = "Start Summarization";
 const THIRD_USER_MSG: &str = "next turn";
 const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
 const FIRST_AUTO_MSG: &str = "token limit start";
@@ -77,13 +77,13 @@ async fn summarize_context_three_requests_and_instructions() {
     let first_matcher = |req: &wiremock::Request| {
         let body = std::str::from_utf8(&req.body).unwrap_or("");
         body.contains("\"text\":\"hello world\"")
-            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+            && !body.contains("You have exceeded the maximum number of tokens")
     };
     mount_sse_once(&server, first_matcher, sse1).await;
 
     let second_matcher = |req: &wiremock::Request| {
         let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+        body.contains("You have exceeded the maximum number of tokens")
     };
     mount_sse_once(&server, second_matcher, sse2).await;
 
@@ -121,7 +121,7 @@ async fn summarize_context_three_requests_and_instructions() {
         .unwrap();
     wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 
-    // 2) Summarize – second hit with summarization instructions.
+    // 2) Summarize – second hit should include the summarization prompt.
     codex.submit(Op::Compact).await.unwrap();
     wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 
@@ -148,16 +148,12 @@ async fn summarize_context_three_requests_and_instructions() {
     let body2 = req2.body_json::<serde_json::Value>().unwrap();
     let body3 = req3.body_json::<serde_json::Value>().unwrap();
 
-    // System instructions should change for the summarization turn.
+    // Manual compact should keep the baseline developer instructions.
     let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
     let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
-    assert_ne!(
+    assert_eq!(
         instr1, instr2,
-        "summarization should override base instructions"
-    );
-    assert!(
-        instr2.contains("You have exceeded the maximum number of tokens"),
-        "summarization instructions not applied"
+        "manual compact should keep the standard developer instructions"
     );
 
     // The summarization request should include the injected user input marker.
@@ -167,14 +163,14 @@ async fn summarize_context_three_requests_and_instructions() {
     assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
     assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
     let text2 = last2["content"][0]["text"].as_str().unwrap();
-    assert!(
-        text2.contains(SUMMARIZE_TRIGGER),
+    assert_eq!(
+        text2, SUMMARIZATION_PROMPT,
         "expected summarize trigger, got `{text2}`"
     );
 
     // Third request must contain the refreshed instructions, bridge summary message and new user msg.
     let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
-    println!("third request body: {body3}");
+
     assert!(
         input3.len() >= 3,
         "expected refreshed context and new user message in third request"
@@ -215,13 +211,13 @@ async fn summarize_context_three_requests_and_instructions() {
         "bridge should capture earlier user messages"
     );
     assert!(
-        !bridge_text.contains(SUMMARIZE_TRIGGER),
+        !bridge_text.contains(SUMMARIZATION_PROMPT),
         "bridge text should not echo the summarize trigger"
     );
     assert!(
         !messages
             .iter()
-            .any(|(_, text)| text.contains(SUMMARIZE_TRIGGER)),
+            .any(|(_, text)| text.contains(SUMMARIZATION_PROMPT)),
         "third request should not include the summarize trigger"
     );
 
@@ -395,6 +391,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
         "auto compact should add a third request"
     );
 
+    let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
     let body3 = requests[auto_compact_index]
         .body_json::<serde_json::Value>()
         .unwrap();
@@ -402,9 +399,32 @@ async fn auto_compact_runs_after_token_limit_hit() {
         .get("instructions")
         .and_then(|v| v.as_str())
         .unwrap_or_default();
-    assert!(
-        instructions.contains("You have exceeded the maximum number of tokens"),
-        "auto compact should reuse summarization instructions"
+    let baseline_instructions = body_first
+        .get("instructions")
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string();
+    assert_eq!(
+        instructions, baseline_instructions,
+        "auto compact should keep the standard developer instructions",
+    );
+
+    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
+    let last3 = input3
+        .last()
+        .expect("auto compact request should append a user message");
+    assert_eq!(last3.get("type").and_then(|v| v.as_str()), Some("message"));
+    assert_eq!(last3.get("role").and_then(|v| v.as_str()), Some("user"));
+    let last_text = last3
+        .get("content")
+        .and_then(|v| v.as_array())
+        .and_then(|items| items.first())
+        .and_then(|item| item.get("text"))
+        .and_then(|text| text.as_str())
+        .unwrap_or_default();
+    assert_eq!(
+        last_text, SUMMARIZATION_PROMPT,
+        "auto compact should send the summarization prompt as a user message",
     );
 }
 
@@ -635,13 +655,25 @@ async fn auto_compact_stops_after_failed_attempt() {
     );
 
     let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
-    let instructions = last_body
-        .get("instructions")
-        .and_then(|v| v.as_str())
-        .unwrap_or_default();
+    let input = last_body
+        .get("input")
+        .and_then(|v| v.as_array())
+        .unwrap_or_else(|| panic!("unexpected request format: {last_body}"));
+    let contains_prompt = input.iter().any(|item| {
+        item.get("type").and_then(|v| v.as_str()) == Some("message")
+            && item.get("role").and_then(|v| v.as_str()) == Some("user")
+            && item
+                .get("content")
+                .and_then(|v| v.as_array())
+                .and_then(|items| items.first())
+                .and_then(|entry| entry.get("text"))
+                .and_then(|text| text.as_str())
+                .map(|text| text == SUMMARIZATION_PROMPT)
+                .unwrap_or(false)
+    });
     assert!(
-        !instructions.contains("You have exceeded the maximum number of tokens"),
-        "third request should be the follow-up turn, not another summarization"
+        !contains_prompt,
+        "third request should be the follow-up turn, not another summarization",
     );
 }
 
@@ -785,7 +817,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
     );
     assert!(
         request_bodies[1].contains("You have exceeded the maximum number of tokens"),
-        "first auto compact request should use summarization instructions"
+        "first auto compact request should include the summarization prompt"
     );
     assert!(
         request_bodies[3].contains(&format!("unsupported call: {DUMMY_FUNCTION_NAME}")),
@@ -793,6 +825,6 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
     );
     assert!(
         request_bodies[4].contains("You have exceeded the maximum number of tokens"),
-        "second auto compact request should reuse summarization instructions"
+        "second auto compact request should include the summarization prompt"
     );
 }
diff --git a/codex-rs/core/tests/suite/compact_resume_fork.rs b/codex-rs/core/tests/suite/compact_resume_fork.rs
index 43d6192f..73ad091f 100644
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -8,7 +8,6 @@
 //! model-visible history matches the expected sequence of messages.
 
 use super::compact::FIRST_REPLY;
-use super::compact::SUMMARIZE_TRIGGER;
 use super::compact::SUMMARY_TEXT;
 use codex_core::CodexAuth;
 use codex_core::CodexConversation;
@@ -16,6 +15,7 @@ use codex_core::ConversationManager;
 use codex_core::ModelProviderInfo;
 use codex_core::NewConversation;
 use codex_core::built_in_model_providers;
+use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use codex_core::config::Config;
 use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::EventMsg;
@@ -183,11 +183,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
     let compact_1 = json!(
     {
       "model": "gpt-5-codex",
-      "instructions": "You have exceeded the maximum number of tokens, please stop coding and instead write a short memento message for the next agent. Your note should:
-- Summarize what you finished and what still needs work. If there was a recent update_plan call, repeat its steps verbatim.
-- List outstanding TODOs with file paths / line numbers so they're easy to find.
-- Flag code that needs more tests (edge cases, performance, integration, etc.).
-- Record any open bugs, quirks, or setup steps that will make it easier for the next agent to pick up where you left off.",
+      "instructions": prompt,
       "input": [
         {
           "type": "message",
@@ -235,7 +231,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
           "content": [
             {
               "type": "input_text",
-              "text": "Start Summarization"
+              "text": SUMMARIZATION_PROMPT
             }
           ]
         }
@@ -488,13 +484,14 @@ SUMMARY_ONLY_CONTEXT"
       ],
       "prompt_cache_key": fork_prompt_cache_key
     });
-    let expected = json!([
+    let mut expected = json!([
         user_turn_1,
         compact_1,
         user_turn_2_after_compact,
         usert_turn_3_after_resume,
         user_turn_3_after_fork
     ]);
+    normalize_line_endings(&mut expected);
     assert_eq!(requests.len(), 5);
     assert_eq!(json!(requests), expected);
 }
@@ -580,7 +577,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
         .unwrap_or_default()
         .to_string();
 
-    let expected = json!([
+    let mut expected = json!([
       {
         "instructions": prompt,
         "input": [
@@ -637,6 +634,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
         ],
       }
     ]);
+    normalize_line_endings(&mut expected);
     let last_request_after_2_compacts = json!([{
         "instructions": requests[requests.len() -1]["instructions"],
         "input": requests[requests.len() -1]["input"],
@@ -698,7 +696,8 @@ async fn mount_initial_flow(server: &MockServer) {
     let match_first = |req: &wiremock::Request| {
         let body = std::str::from_utf8(&req.body).unwrap_or("");
         body.contains("\"text\":\"hello world\"")
-            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+            && !body.contains("You have exceeded the maximum number of tokens")
+            && !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
             && !body.contains("\"text\":\"AFTER_COMPACT\"")
             && !body.contains("\"text\":\"AFTER_RESUME\"")
             && !body.contains("\"text\":\"AFTER_FORK\"")
@@ -707,7 +706,7 @@ async fn mount_initial_flow(server: &MockServer) {
 
     let match_compact = |req: &wiremock::Request| {
         let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+        body.contains("You have exceeded the maximum number of tokens")
     };
     mount_sse_once(server, match_compact, sse2).await;
 
@@ -741,7 +740,8 @@ async fn mount_second_compact_flow(server: &MockServer) {
 
     let match_second_compact = |req: &wiremock::Request| {
         let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\"")) && body.contains("AFTER_FORK")
+        body.contains("You have exceeded the maximum number of tokens")
+            && body.contains("AFTER_FORK")
     };
     mount_sse_once(server, match_second_compact, sse6).await;