chore: compact do not modify instructions (#4088)
Keep the developer instruction and insert the summarisation message as a user message instead
This commit is contained in:
@@ -1117,16 +1117,13 @@ impl AgentTask {
|
||||
turn_context: Arc<TurnContext>,
|
||||
sub_id: String,
|
||||
input: Vec<InputItem>,
|
||||
compact_instructions: String,
|
||||
) -> Self {
|
||||
let handle = {
|
||||
let sess = sess.clone();
|
||||
let sub_id = sub_id.clone();
|
||||
let tc = Arc::clone(&turn_context);
|
||||
tokio::spawn(async move {
|
||||
compact::run_compact_task(sess, tc, sub_id, input, compact_instructions).await
|
||||
})
|
||||
.abort_handle()
|
||||
tokio::spawn(async move { compact::run_compact_task(sess, tc, sub_id, input).await })
|
||||
.abort_handle()
|
||||
};
|
||||
Self {
|
||||
sess,
|
||||
@@ -1434,7 +1431,7 @@ async fn submission_loop(
|
||||
// Attempt to inject input into current task
|
||||
if let Err(items) = sess
|
||||
.inject_input(vec![InputItem::Text {
|
||||
text: compact::COMPACT_TRIGGER_TEXT.to_string(),
|
||||
text: compact::SUMMARIZATION_PROMPT.to_string(),
|
||||
}])
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -27,8 +27,7 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use futures::prelude::*;
|
||||
|
||||
pub(super) const COMPACT_TRIGGER_TEXT: &str = "Start Summarization";
|
||||
const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
#[derive(Template)]
|
||||
@@ -44,13 +43,7 @@ pub(super) async fn spawn_compact_task(
|
||||
sub_id: String,
|
||||
input: Vec<InputItem>,
|
||||
) {
|
||||
let task = AgentTask::compact(
|
||||
sess.clone(),
|
||||
turn_context,
|
||||
sub_id,
|
||||
input,
|
||||
SUMMARIZATION_PROMPT.to_string(),
|
||||
);
|
||||
let task = AgentTask::compact(sess.clone(), turn_context, sub_id, input);
|
||||
sess.set_task(task).await;
|
||||
}
|
||||
|
||||
@@ -60,17 +53,9 @@ pub(super) async fn run_inline_auto_compact_task(
|
||||
) {
|
||||
let sub_id = sess.next_internal_sub_id();
|
||||
let input = vec![InputItem::Text {
|
||||
text: COMPACT_TRIGGER_TEXT.to_string(),
|
||||
text: SUMMARIZATION_PROMPT.to_string(),
|
||||
}];
|
||||
run_compact_task_inner(
|
||||
sess,
|
||||
turn_context,
|
||||
sub_id,
|
||||
input,
|
||||
SUMMARIZATION_PROMPT.to_string(),
|
||||
false,
|
||||
)
|
||||
.await;
|
||||
run_compact_task_inner(sess, turn_context, sub_id, input, None, false).await;
|
||||
}
|
||||
|
||||
pub(super) async fn run_compact_task(
|
||||
@@ -78,7 +63,6 @@ pub(super) async fn run_compact_task(
|
||||
turn_context: Arc<TurnContext>,
|
||||
sub_id: String,
|
||||
input: Vec<InputItem>,
|
||||
compact_instructions: String,
|
||||
) {
|
||||
let start_event = Event {
|
||||
id: sub_id.clone(),
|
||||
@@ -92,7 +76,7 @@ pub(super) async fn run_compact_task(
|
||||
turn_context,
|
||||
sub_id.clone(),
|
||||
input,
|
||||
compact_instructions,
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await;
|
||||
@@ -110,11 +94,10 @@ async fn run_compact_task_inner(
|
||||
turn_context: Arc<TurnContext>,
|
||||
sub_id: String,
|
||||
input: Vec<InputItem>,
|
||||
compact_instructions: String,
|
||||
instructions_override: Option<String>,
|
||||
remove_task_on_completion: bool,
|
||||
) {
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
let instructions_override = compact_instructions;
|
||||
let turn_input = sess
|
||||
.turn_input_with_history(vec![initial_input_for_turn.clone().into()])
|
||||
.await;
|
||||
@@ -122,7 +105,7 @@ async fn run_compact_task_inner(
|
||||
let prompt = Prompt {
|
||||
input: turn_input,
|
||||
tools: Vec::new(),
|
||||
base_instructions_override: Some(instructions_override),
|
||||
base_instructions_override: instructions_override,
|
||||
};
|
||||
|
||||
let max_retries = turn_context.client.get_provider().stream_max_retries();
|
||||
|
||||
@@ -19,6 +19,7 @@ use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
use codex_core::codex::compact::SUMMARIZATION_PROMPT;
|
||||
use core_test_support::non_sandbox_test;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
@@ -37,7 +38,6 @@ use std::sync::atomic::Ordering;
|
||||
|
||||
pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
|
||||
pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
|
||||
pub(super) const SUMMARIZE_TRIGGER: &str = "Start Summarization";
|
||||
const THIRD_USER_MSG: &str = "next turn";
|
||||
const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
|
||||
const FIRST_AUTO_MSG: &str = "token limit start";
|
||||
@@ -77,13 +77,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once(&server, second_matcher, sse2).await;
|
||||
|
||||
@@ -121,7 +121,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// 2) Summarize – second hit with summarization instructions.
|
||||
// 2) Summarize – second hit should include the summarization prompt.
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
@@ -148,16 +148,12 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
let body2 = req2.body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = req3.body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
// System instructions should change for the summarization turn.
|
||||
// Manual compact should keep the baseline developer instructions.
|
||||
let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
|
||||
let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
|
||||
assert_ne!(
|
||||
assert_eq!(
|
||||
instr1, instr2,
|
||||
"summarization should override base instructions"
|
||||
);
|
||||
assert!(
|
||||
instr2.contains("You have exceeded the maximum number of tokens"),
|
||||
"summarization instructions not applied"
|
||||
"manual compact should keep the standard developer instructions"
|
||||
);
|
||||
|
||||
// The summarization request should include the injected user input marker.
|
||||
@@ -167,14 +163,14 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
|
||||
assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
|
||||
let text2 = last2["content"][0]["text"].as_str().unwrap();
|
||||
assert!(
|
||||
text2.contains(SUMMARIZE_TRIGGER),
|
||||
assert_eq!(
|
||||
text2, SUMMARIZATION_PROMPT,
|
||||
"expected summarize trigger, got `{text2}`"
|
||||
);
|
||||
|
||||
// Third request must contain the refreshed instructions, bridge summary message and new user msg.
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
println!("third request body: {body3}");
|
||||
|
||||
assert!(
|
||||
input3.len() >= 3,
|
||||
"expected refreshed context and new user message in third request"
|
||||
@@ -215,13 +211,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
"bridge should capture earlier user messages"
|
||||
);
|
||||
assert!(
|
||||
!bridge_text.contains(SUMMARIZE_TRIGGER),
|
||||
!bridge_text.contains(SUMMARIZATION_PROMPT),
|
||||
"bridge text should not echo the summarize trigger"
|
||||
);
|
||||
assert!(
|
||||
!messages
|
||||
.iter()
|
||||
.any(|(_, text)| text.contains(SUMMARIZE_TRIGGER)),
|
||||
.any(|(_, text)| text.contains(SUMMARIZATION_PROMPT)),
|
||||
"third request should not include the summarize trigger"
|
||||
);
|
||||
|
||||
@@ -395,6 +391,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = requests[auto_compact_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
@@ -402,9 +399,32 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
assert!(
|
||||
instructions.contains("You have exceeded the maximum number of tokens"),
|
||||
"auto compact should reuse summarization instructions"
|
||||
let baseline_instructions = body_first
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
instructions, baseline_instructions,
|
||||
"auto compact should keep the standard developer instructions",
|
||||
);
|
||||
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let last3 = input3
|
||||
.last()
|
||||
.expect("auto compact request should append a user message");
|
||||
assert_eq!(last3.get("type").and_then(|v| v.as_str()), Some("message"));
|
||||
assert_eq!(last3.get("role").and_then(|v| v.as_str()), Some("user"));
|
||||
let last_text = last3
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|item| item.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
.unwrap_or_default();
|
||||
assert_eq!(
|
||||
last_text, SUMMARIZATION_PROMPT,
|
||||
"auto compact should send the summarization prompt as a user message",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -635,13 +655,25 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
);
|
||||
|
||||
let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
|
||||
let instructions = last_body
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
let input = last_body
|
||||
.get("input")
|
||||
.and_then(|v| v.as_array())
|
||||
.unwrap_or_else(|| panic!("unexpected request format: {last_body}"));
|
||||
let contains_prompt = input.iter().any(|item| {
|
||||
item.get("type").and_then(|v| v.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|v| v.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
.map(|text| text == SUMMARIZATION_PROMPT)
|
||||
.unwrap_or(false)
|
||||
});
|
||||
assert!(
|
||||
!instructions.contains("You have exceeded the maximum number of tokens"),
|
||||
"third request should be the follow-up turn, not another summarization"
|
||||
!contains_prompt,
|
||||
"third request should be the follow-up turn, not another summarization",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -785,7 +817,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
);
|
||||
assert!(
|
||||
request_bodies[1].contains("You have exceeded the maximum number of tokens"),
|
||||
"first auto compact request should use summarization instructions"
|
||||
"first auto compact request should include the summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[3].contains(&format!("unsupported call: {DUMMY_FUNCTION_NAME}")),
|
||||
@@ -793,6 +825,6 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
);
|
||||
assert!(
|
||||
request_bodies[4].contains("You have exceeded the maximum number of tokens"),
|
||||
"second auto compact request should reuse summarization instructions"
|
||||
"second auto compact request should include the summarization prompt"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
//! model-visible history matches the expected sequence of messages.
|
||||
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARIZE_TRIGGER;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
@@ -16,6 +15,7 @@ use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::codex::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::ConversationPathResponseEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
@@ -183,11 +183,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
let compact_1 = json!(
|
||||
{
|
||||
"model": "gpt-5-codex",
|
||||
"instructions": "You have exceeded the maximum number of tokens, please stop coding and instead write a short memento message for the next agent. Your note should:
|
||||
- Summarize what you finished and what still needs work. If there was a recent update_plan call, repeat its steps verbatim.
|
||||
- List outstanding TODOs with file paths / line numbers so they're easy to find.
|
||||
- Flag code that needs more tests (edge cases, performance, integration, etc.).
|
||||
- Record any open bugs, quirks, or setup steps that will make it easier for the next agent to pick up where you left off.",
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
@@ -235,7 +231,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "Start Summarization"
|
||||
"text": SUMMARIZATION_PROMPT
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -488,13 +484,14 @@ SUMMARY_ONLY_CONTEXT"
|
||||
],
|
||||
"prompt_cache_key": fork_prompt_cache_key
|
||||
});
|
||||
let expected = json!([
|
||||
let mut expected = json!([
|
||||
user_turn_1,
|
||||
compact_1,
|
||||
user_turn_2_after_compact,
|
||||
usert_turn_3_after_resume,
|
||||
user_turn_3_after_fork
|
||||
]);
|
||||
normalize_line_endings(&mut expected);
|
||||
assert_eq!(requests.len(), 5);
|
||||
assert_eq!(json!(requests), expected);
|
||||
}
|
||||
@@ -580,7 +577,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
let expected = json!([
|
||||
let mut expected = json!([
|
||||
{
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
@@ -637,6 +634,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
],
|
||||
}
|
||||
]);
|
||||
normalize_line_endings(&mut expected);
|
||||
let last_request_after_2_compacts = json!([{
|
||||
"instructions": requests[requests.len() -1]["instructions"],
|
||||
"input": requests[requests.len() -1]["input"],
|
||||
@@ -698,7 +696,8 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
&& !body.contains("\"text\":\"AFTER_FORK\"")
|
||||
@@ -707,7 +706,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once(server, match_compact, sse2).await;
|
||||
|
||||
@@ -741,7 +740,8 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\"")) && body.contains("AFTER_FORK")
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
&& body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once(server, match_second_compact, sse6).await;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user