chore: unify history loading (#2736)

We have two ways of loading conversation with a previous history. Fork
conversation and the experimental resume that we had before. In this PR,
I am unifying their code path. The path is getting the history items and
recording them in a brand new conversation. This PR also constraint the
rollout recorder responsibilities to be only recording to the disk and
loading from the disk.

The PR also fixes a current bug when we have two forking in a row:
History 1:
<Environment Context>
UserMessage_1
UserMessage_2
UserMessage_3

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
<Environment Context>

**Fork with n = 1 (only remove one element)**
History 2:
<Environment Context>
UserMessage_1
UserMessage_2
**<Environment Context>**

This shouldn't happen but because we were appending the `<Environment
Context>` after each spawning and it's considered as _user message_.
Now, we don't add this message if restoring and old conversation.
This commit is contained in:
Ahmed Ibrahim
2025-09-02 15:44:29 -07:00
committed by GitHub
parent 8b993b557d
commit 431a10fc50
6 changed files with 309 additions and 165 deletions

View File

@@ -388,7 +388,7 @@ async fn integration_creates_and_checks_session_file() {
"No message found in session file containing the marker"
);
// Second run: resume and append.
// Second run: resume should create a NEW session file that contains both old and new history.
let orig_len = content.lines().count();
let marker2 = format!("integration-resume-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
@@ -419,31 +419,58 @@ async fn integration_creates_and_checks_session_file() {
let output2 = cmd2.output().unwrap();
assert!(output2.status.success(), "resume codex-cli run failed");
// The rollout writer runs on a background async task; give it a moment to flush.
let mut new_len = orig_len;
let deadline = Instant::now() + Duration::from_secs(5);
let mut content2 = String::new();
while Instant::now() < deadline {
if let Ok(c) = std::fs::read_to_string(&path) {
let count = c.lines().count();
if count > orig_len {
content2 = c;
new_len = count;
// Find the new session file containing the resumed marker.
let deadline = Instant::now() + Duration::from_secs(10);
let mut resumed_path: Option<std::path::PathBuf> = None;
while Instant::now() < deadline && resumed_path.is_none() {
for entry in WalkDir::new(&sessions_dir) {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
continue;
}
let p = entry.path();
let Ok(c) = std::fs::read_to_string(p) else {
continue;
};
if c.contains(&marker2) {
resumed_path = Some(p.to_path_buf());
break;
}
}
std::thread::sleep(Duration::from_millis(50));
if resumed_path.is_none() {
std::thread::sleep(Duration::from_millis(50));
}
}
if content2.is_empty() {
// last attempt
content2 = std::fs::read_to_string(&path).unwrap();
new_len = content2.lines().count();
}
assert!(new_len > orig_len, "rollout file did not grow after resume");
assert!(content2.contains(&marker), "rollout lost original marker");
let resumed_path = resumed_path.expect("No resumed session file found containing the marker2");
// Resume should have written to a new file, not the original one.
assert_ne!(
resumed_path, path,
"resume should create a new session file"
);
let resumed_content = std::fs::read_to_string(&resumed_path).unwrap();
assert!(
content2.contains(&marker2),
"rollout missing resumed marker"
resumed_content.contains(&marker),
"resumed file missing original marker"
);
assert!(
resumed_content.contains(&marker2),
"resumed file missing resumed marker"
);
// Original file should remain unchanged.
let content_after = std::fs::read_to_string(&path).unwrap();
assert_eq!(
content_after.lines().count(),
orig_len,
"original rollout file should not change on resume"
);
}

View File

@@ -0,0 +1,154 @@
use codex_core::ConversationManager;
use codex_core::ModelProviderInfo;
use codex_core::NewConversation;
use codex_core::built_in_model_providers;
use codex_core::protocol::ConversationHistoryResponseEvent;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_login::CodexAuth;
use core_test_support::load_default_config_for_test;
use core_test_support::wait_for_event;
use tempfile::TempDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
/// Build minimal SSE stream with completed marker using the JSON fixture.
fn sse_completed(id: &str) -> String {
core_test_support::load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn fork_conversation_twice_drops_to_first_message() {
// Start a mock server that completes three turns.
let server = MockServer::start().await;
let sse = sse_completed("resp");
let first = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse.clone(), "text/event-stream");
// Expect three calls to /v1/responses one per user input.
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(first)
.expect(3)
.mount(&server)
.await;
// Configure Codex to use the mock server.
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider.clone();
let config_for_fork = config.clone();
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let NewConversation {
conversation: codex,
..
} = conversation_manager
.new_conversation(config)
.await
.expect("create conversation");
// Send three user messages; wait for three completed turns.
for text in ["first", "second", "third"] {
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: text.to_string(),
}],
})
.await
.unwrap();
let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
}
// Request history from the base conversation.
codex.submit(Op::GetHistory).await.unwrap();
let base_history =
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ConversationHistory(_))).await;
// Capture entries from the base history and compute expected prefixes after each fork.
let entries_after_three = match &base_history {
EventMsg::ConversationHistory(ConversationHistoryResponseEvent { entries, .. }) => {
entries.clone()
}
_ => panic!("expected ConversationHistory event"),
};
// History layout for this test:
// [0] user instructions,
// [1] environment context,
// [2] "first" user message,
// [3] "second" user message,
// [4] "third" user message.
// Fork 1: drops the last user message and everything after.
let expected_after_first = vec![
entries_after_three[0].clone(),
entries_after_three[1].clone(),
entries_after_three[2].clone(),
entries_after_three[3].clone(),
];
// Fork 2: drops the last user message and everything after.
// [0] user instructions,
// [1] environment context,
// [2] "first" user message,
let expected_after_second = vec![
entries_after_three[0].clone(),
entries_after_three[1].clone(),
entries_after_three[2].clone(),
];
// Fork once with n=1 → drops the last user message and everything after.
let NewConversation {
conversation: codex_fork1,
..
} = conversation_manager
.fork_conversation(entries_after_three.clone(), 1, config_for_fork.clone())
.await
.expect("fork 1");
codex_fork1.submit(Op::GetHistory).await.unwrap();
let fork1_history = wait_for_event(&codex_fork1, |ev| {
matches!(ev, EventMsg::ConversationHistory(_))
})
.await;
let entries_after_first_fork = match &fork1_history {
EventMsg::ConversationHistory(ConversationHistoryResponseEvent { entries, .. }) => {
assert!(matches!(
fork1_history,
EventMsg::ConversationHistory(ConversationHistoryResponseEvent { ref entries, .. }) if *entries == expected_after_first
));
entries.clone()
}
_ => panic!("expected ConversationHistory event after first fork"),
};
// Fork again with n=1 → drops the (new) last user message, leaving only the first.
let NewConversation {
conversation: codex_fork2,
..
} = conversation_manager
.fork_conversation(entries_after_first_fork.clone(), 1, config_for_fork.clone())
.await
.expect("fork 2");
codex_fork2.submit(Op::GetHistory).await.unwrap();
let fork2_history = wait_for_event(&codex_fork2, |ev| {
matches!(ev, EventMsg::ConversationHistory(_))
})
.await;
assert!(matches!(
fork2_history,
EventMsg::ConversationHistory(ConversationHistoryResponseEvent { ref entries, .. }) if *entries == expected_after_second
));
}

View File

@@ -5,6 +5,7 @@ mod client;
mod compact;
mod exec;
mod exec_stream_events;
mod fork_conversation;
mod live_cli;
mod prompt_caching;
mod seatbelt;