Introduce rollout items (#3380)

This PR introduces Rollout items. This enable us to rollout eventmsgs
and session meta.

This is mostly #3214 with rebase on main
This commit is contained in:
Ahmed Ibrahim
2025-09-09 16:52:33 -07:00
committed by GitHub
parent 5c48600bb3
commit 43809a454e
12 changed files with 615 additions and 309 deletions

View File

@@ -10,6 +10,9 @@ use time::macros::format_description;
use uuid::Uuid;
use super::SESSIONS_SUBDIR;
use super::recorder::RolloutItem;
use super::recorder::RolloutLine;
use crate::protocol::EventMsg;
/// Returned page of conversation summaries.
#[derive(Debug, Default, PartialEq)]
@@ -34,7 +37,7 @@ pub struct ConversationItem {
}
/// Hard cap to bound worstcase work per request.
const MAX_SCAN_FILES: usize = 10_000;
const MAX_SCAN_FILES: usize = 100;
const HEAD_RECORD_LIMIT: usize = 10;
/// Pagination cursor identifying a file by timestamp and UUID.
@@ -167,10 +170,16 @@ async fn traverse_directories_for_paths(
if items.len() == page_size {
break 'outer;
}
let head = read_first_jsonl_records(&path, HEAD_RECORD_LIMIT)
.await
.unwrap_or_default();
items.push(ConversationItem { path, head });
// Read head and simultaneously detect message events within the same
// first N JSONL records to avoid a second file read.
let (head, saw_session_meta, saw_user_event) =
read_head_and_flags(&path, HEAD_RECORD_LIMIT)
.await
.unwrap_or((Vec::new(), false, false));
// Apply filters: must have session meta and at least one user message event
if saw_session_meta && saw_user_event {
items.push(ConversationItem { path, head });
}
}
}
}
@@ -273,16 +282,19 @@ fn parse_timestamp_uuid_from_filename(name: &str) -> Option<(OffsetDateTime, Uui
Some((ts, uuid))
}
async fn read_first_jsonl_records(
async fn read_head_and_flags(
path: &Path,
max_records: usize,
) -> io::Result<Vec<serde_json::Value>> {
) -> io::Result<(Vec<serde_json::Value>, bool, bool)> {
use tokio::io::AsyncBufReadExt;
let file = tokio::fs::File::open(path).await?;
let reader = tokio::io::BufReader::new(file);
let mut lines = reader.lines();
let mut head: Vec<serde_json::Value> = Vec::new();
let mut saw_session_meta = false;
let mut saw_user_event = false;
while head.len() < max_records {
let line_opt = lines.next_line().await?;
let Some(line) = line_opt else { break };
@@ -290,9 +302,29 @@ async fn read_first_jsonl_records(
if trimmed.is_empty() {
continue;
}
if let Ok(v) = serde_json::from_str::<serde_json::Value>(trimmed) {
head.push(v);
let parsed: Result<RolloutLine, _> = serde_json::from_str(trimmed);
let Ok(rollout_line) = parsed else { continue };
match rollout_line.item {
RolloutItem::SessionMeta(session_meta_line) => {
if let Ok(val) = serde_json::to_value(session_meta_line) {
head.push(val);
saw_session_meta = true;
}
}
RolloutItem::ResponseItem(item) => {
if let Ok(val) = serde_json::to_value(item) {
head.push(val);
}
}
RolloutItem::EventMsg(ev) => {
if matches!(ev, EventMsg::UserMessage(_)) {
saw_user_event = true;
}
}
}
}
Ok(head)
Ok((head, saw_session_meta, saw_user_event))
}

View File

@@ -10,7 +10,6 @@ pub mod recorder;
pub use recorder::RolloutRecorder;
pub use recorder::RolloutRecorderParams;
pub use recorder::SessionMeta;
pub use recorder::SessionStateSnapshot;
#[cfg(test)]
pub mod tests;

View File

@@ -1,8 +1,21 @@
use crate::protocol::EventMsg;
use crate::rollout::recorder::RolloutItem;
use codex_protocol::models::ResponseItem;
/// Whether a rollout `item` should be persisted in rollout files.
#[inline]
pub(crate) fn is_persisted_response_item(item: &RolloutItem) -> bool {
match item {
RolloutItem::ResponseItem(item) => should_persist_response_item(item),
RolloutItem::EventMsg(ev) => should_persist_event_msg(ev),
// Always persist session meta
RolloutItem::SessionMeta(_) => true,
}
}
/// Whether a `ResponseItem` should be persisted in rollout files.
#[inline]
pub(crate) fn is_persisted_response_item(item: &ResponseItem) -> bool {
pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
match item {
ResponseItem::Message { .. }
| ResponseItem::Reasoning { .. }
@@ -14,3 +27,44 @@ pub(crate) fn is_persisted_response_item(item: &ResponseItem) -> bool {
ResponseItem::WebSearchCall { .. } | ResponseItem::Other => false,
}
}
/// Whether an `EventMsg` should be persisted in rollout files.
#[inline]
pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
match ev {
EventMsg::UserMessage(_)
| EventMsg::AgentMessage(_)
| EventMsg::AgentReasoning(_)
| EventMsg::AgentReasoningRawContent(_)
| EventMsg::TokenCount(_) => true,
EventMsg::Error(_)
| EventMsg::TaskStarted(_)
| EventMsg::TaskComplete(_)
| EventMsg::AgentMessageDelta(_)
| EventMsg::AgentReasoningDelta(_)
| EventMsg::AgentReasoningRawContentDelta(_)
| EventMsg::AgentReasoningSectionBreak(_)
| EventMsg::SessionConfigured(_)
| EventMsg::McpToolCallBegin(_)
| EventMsg::McpToolCallEnd(_)
| EventMsg::WebSearchBegin(_)
| EventMsg::WebSearchEnd(_)
| EventMsg::ExecCommandBegin(_)
| EventMsg::ExecCommandOutputDelta(_)
| EventMsg::ExecCommandEnd(_)
| EventMsg::ExecApprovalRequest(_)
| EventMsg::ApplyPatchApprovalRequest(_)
| EventMsg::BackgroundEvent(_)
| EventMsg::StreamError(_)
| EventMsg::PatchApplyBegin(_)
| EventMsg::PatchApplyEnd(_)
| EventMsg::TurnDiff(_)
| EventMsg::GetHistoryEntryResponse(_)
| EventMsg::McpListToolsResponse(_)
| EventMsg::ListCustomPromptsResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::TurnAborted(_)
| EventMsg::ShutdownComplete
| EventMsg::ConversationHistory(_) => false,
}
}

View File

@@ -28,25 +28,45 @@ use super::policy::is_persisted_response_item;
use crate::config::Config;
use crate::conversation_manager::InitialHistory;
use crate::conversation_manager::ResumedHistory;
use crate::default_client::ORIGINATOR;
use crate::git_info::GitInfo;
use crate::git_info::collect_git_info;
use crate::protocol::EventMsg;
use codex_protocol::models::ResponseItem;
#[derive(Serialize, Deserialize, Clone, Default)]
#[derive(Serialize, Deserialize, Clone, Default, Debug)]
pub struct SessionMeta {
pub id: ConversationId,
pub timestamp: String,
pub cwd: PathBuf,
pub originator: String,
pub cli_version: String,
pub instructions: Option<String>,
}
#[derive(Serialize)]
struct SessionMetaWithGit {
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct SessionMetaLine {
#[serde(flatten)]
meta: SessionMeta,
#[serde(skip_serializing_if = "Option::is_none")]
git: Option<GitInfo>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(tag = "type", content = "payload", rename_all = "snake_case")]
pub enum RolloutItem {
SessionMeta(SessionMetaLine),
ResponseItem(ResponseItem),
EventMsg(EventMsg),
}
#[derive(Serialize, Deserialize, Clone)]
pub(crate) struct RolloutLine {
pub(crate) timestamp: String,
#[serde(flatten)]
pub(crate) item: RolloutItem,
}
#[derive(Serialize, Deserialize, Default, Clone)]
pub struct SessionStateSnapshot {}
@@ -87,8 +107,7 @@ pub enum RolloutRecorderParams {
}
enum RolloutCmd {
AddItems(Vec<ResponseItem>),
UpdateState(SessionStateSnapshot),
AddItems(Vec<RolloutItem>),
Shutdown { ack: oneshot::Sender<()> },
}
@@ -144,8 +163,11 @@ impl RolloutRecorder {
tokio::fs::File::from_std(file),
path,
Some(SessionMeta {
timestamp,
id: session_id,
timestamp,
cwd: config.cwd.clone(),
originator: ORIGINATOR.value.clone(),
cli_version: env!("CARGO_PKG_VERSION").to_string(),
instructions,
}),
)
@@ -176,7 +198,7 @@ impl RolloutRecorder {
Ok(Self { tx, rollout_path })
}
pub(crate) async fn record_items(&self, items: &[ResponseItem]) -> std::io::Result<()> {
pub(crate) async fn record_items(&self, items: &[RolloutItem]) -> std::io::Result<()> {
let mut filtered = Vec::new();
for item in items {
// Note that function calls may look a bit strange if they are
@@ -195,60 +217,48 @@ impl RolloutRecorder {
.map_err(|e| IoError::other(format!("failed to queue rollout items: {e}")))
}
pub(crate) async fn record_state(&self, state: SessionStateSnapshot) -> std::io::Result<()> {
self.tx
.send(RolloutCmd::UpdateState(state))
.await
.map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
}
pub async fn get_rollout_history(path: &Path) -> std::io::Result<InitialHistory> {
pub(crate) async fn get_rollout_history(path: &Path) -> std::io::Result<InitialHistory> {
info!("Resuming rollout from {path:?}");
tracing::error!("Resuming rollout from {path:?}");
let text = tokio::fs::read_to_string(path).await?;
let mut lines = text.lines();
let first_line = lines
.next()
.ok_or_else(|| IoError::other("empty session file"))?;
let conversation_id = match serde_json::from_str::<SessionMeta>(first_line) {
Ok(rollout_session_meta) => {
tracing::error!(
"Parsed conversation ID from rollout file: {:?}",
rollout_session_meta.id
);
Some(rollout_session_meta.id)
}
Err(e) => {
return Err(IoError::other(format!(
"failed to parse first line of rollout file as SessionMeta: {e}"
)));
}
};
if text.trim().is_empty() {
return Err(IoError::other("empty session file"));
}
let mut items = Vec::new();
for line in lines {
let mut items: Vec<RolloutItem> = Vec::new();
let mut conversation_id: Option<ConversationId> = None;
for line in text.lines() {
if line.trim().is_empty() {
continue;
}
let v: Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
if v.get("record_type")
.and_then(|rt| rt.as_str())
.map(|s| s == "state")
.unwrap_or(false)
{
continue;
}
match serde_json::from_value::<ResponseItem>(v.clone()) {
Ok(item) => {
if is_persisted_response_item(&item) {
items.push(item);
}
}
Err(e) => {
warn!("failed to parse item: {v:?}, error: {e}");
warn!("failed to parse line as JSON: {line:?}, error: {e}");
continue;
}
};
// Parse the rollout line structure
match serde_json::from_value::<RolloutLine>(v.clone()) {
Ok(rollout_line) => match rollout_line.item {
RolloutItem::SessionMeta(session_meta_line) => {
tracing::error!(
"Parsed conversation ID from rollout file: {:?}",
session_meta_line.meta.id
);
conversation_id = Some(session_meta_line.meta.id);
items.push(RolloutItem::SessionMeta(session_meta_line));
}
RolloutItem::ResponseItem(item) => {
items.push(RolloutItem::ResponseItem(item));
}
RolloutItem::EventMsg(_ev) => {
items.push(RolloutItem::EventMsg(_ev));
}
},
Err(e) => {
warn!("failed to parse rollout line: {v:?}, error: {e}");
}
}
}
@@ -352,13 +362,15 @@ async fn rollout_writer(
// If we have a meta, collect git info asynchronously and write meta first
if let Some(session_meta) = meta.take() {
let git_info = collect_git_info(&cwd).await;
let session_meta_with_git = SessionMetaWithGit {
let session_meta_line = SessionMetaLine {
meta: session_meta,
git: git_info,
};
// Write the SessionMeta as the first item in the file
writer.write_line(&session_meta_with_git).await?;
// Write the SessionMeta as the first item in the file, wrapped in a rollout line
writer
.write_rollout_item(RolloutItem::SessionMeta(session_meta_line))
.await?;
}
// Process rollout commands
@@ -367,24 +379,10 @@ async fn rollout_writer(
RolloutCmd::AddItems(items) => {
for item in items {
if is_persisted_response_item(&item) {
writer.write_line(&item).await?;
writer.write_rollout_item(item).await?;
}
}
}
RolloutCmd::UpdateState(state) => {
#[derive(Serialize)]
struct StateLine<'a> {
record_type: &'static str,
#[serde(flatten)]
state: &'a SessionStateSnapshot,
}
writer
.write_line(&StateLine {
record_type: "state",
state: &state,
})
.await?;
}
RolloutCmd::Shutdown { ack } => {
let _ = ack.send(());
}
@@ -399,6 +397,20 @@ struct JsonlWriter {
}
impl JsonlWriter {
async fn write_rollout_item(&mut self, rollout_item: RolloutItem) -> std::io::Result<()> {
let timestamp_format: &[FormatItem] = format_description!(
"[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond digits:3]Z"
);
let timestamp = OffsetDateTime::now_utc()
.format(timestamp_format)
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
let line = RolloutLine {
timestamp,
item: rollout_item,
};
self.write_line(&line).await
}
async fn write_line(&mut self, item: &impl serde::Serialize) -> std::io::Result<()> {
let mut json = serde_json::to_string(item)?;
json.push('\n');

View File

@@ -42,10 +42,30 @@ fn write_session_file(
let meta = serde_json::json!({
"timestamp": ts_str,
"id": uuid.to_string()
"type": "session_meta",
"payload": {
"id": uuid,
"timestamp": ts_str,
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
}
});
writeln!(file, "{meta}")?;
// Include at least one user message event to satisfy listing filters
let user_event = serde_json::json!({
"timestamp": ts_str,
"type": "event_msg",
"payload": {
"type": "user_message",
"message": "Hello from user",
"kind": "plain"
}
});
writeln!(file, "{user_event}")?;
for i in 0..num_records {
let rec = serde_json::json!({
"record_type": "response",
@@ -93,24 +113,30 @@ async fn test_list_conversations_latest_first() {
.join("01")
.join(format!("rollout-2025-01-01T12-00-00-{u1}.jsonl"));
let head_3 = vec![
serde_json::json!({"timestamp": "2025-01-03T12-00-00", "id": u3.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
serde_json::json!({"record_type": "response", "index": 1}),
serde_json::json!({"record_type": "response", "index": 2}),
];
let head_2 = vec![
serde_json::json!({"timestamp": "2025-01-02T12-00-00", "id": u2.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
serde_json::json!({"record_type": "response", "index": 1}),
serde_json::json!({"record_type": "response", "index": 2}),
];
let head_1 = vec![
serde_json::json!({"timestamp": "2025-01-01T12-00-00", "id": u1.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
serde_json::json!({"record_type": "response", "index": 1}),
serde_json::json!({"record_type": "response", "index": 2}),
];
let head_3 = vec![serde_json::json!({
"id": u3,
"timestamp": "2025-01-03T12-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let head_2 = vec![serde_json::json!({
"id": u2,
"timestamp": "2025-01-02T12-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let head_1 = vec![serde_json::json!({
"id": u1,
"timestamp": "2025-01-01T12-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let expected_cursor: Cursor =
serde_json::from_str(&format!("\"2025-01-01T12-00-00|{u1}\"")).unwrap();
@@ -170,14 +196,22 @@ async fn test_pagination_cursor() {
.join("03")
.join("04")
.join(format!("rollout-2025-03-04T09-00-00-{u4}.jsonl"));
let head_5 = vec![
serde_json::json!({"timestamp": "2025-03-05T09-00-00", "id": u5.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
];
let head_4 = vec![
serde_json::json!({"timestamp": "2025-03-04T09-00-00", "id": u4.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
];
let head_5 = vec![serde_json::json!({
"id": u5,
"timestamp": "2025-03-05T09-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let head_4 = vec![serde_json::json!({
"id": u4,
"timestamp": "2025-03-04T09-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let expected_cursor1: Cursor =
serde_json::from_str(&format!("\"2025-03-04T09-00-00|{u4}\"")).unwrap();
let expected_page1 = ConversationsPage {
@@ -212,14 +246,22 @@ async fn test_pagination_cursor() {
.join("03")
.join("02")
.join(format!("rollout-2025-03-02T09-00-00-{u2}.jsonl"));
let head_3 = vec![
serde_json::json!({"timestamp": "2025-03-03T09-00-00", "id": u3.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
];
let head_2 = vec![
serde_json::json!({"timestamp": "2025-03-02T09-00-00", "id": u2.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
];
let head_3 = vec![serde_json::json!({
"id": u3,
"timestamp": "2025-03-03T09-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let head_2 = vec![serde_json::json!({
"id": u2,
"timestamp": "2025-03-02T09-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let expected_cursor2: Cursor =
serde_json::from_str(&format!("\"2025-03-02T09-00-00|{u2}\"")).unwrap();
let expected_page2 = ConversationsPage {
@@ -248,10 +290,14 @@ async fn test_pagination_cursor() {
.join("03")
.join("01")
.join(format!("rollout-2025-03-01T09-00-00-{u1}.jsonl"));
let head_1 = vec![
serde_json::json!({"timestamp": "2025-03-01T09-00-00", "id": u1.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
];
let head_1 = vec![serde_json::json!({
"id": u1,
"timestamp": "2025-03-01T09-00-00",
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let expected_cursor3: Cursor =
serde_json::from_str(&format!("\"2025-03-01T09-00-00|{u1}\"")).unwrap();
let expected_page3 = ConversationsPage {
@@ -287,11 +333,14 @@ async fn test_get_conversation_contents() {
.join("04")
.join("01")
.join(format!("rollout-2025-04-01T10-30-00-{uuid}.jsonl"));
let expected_head = vec![
serde_json::json!({"timestamp": ts, "id": uuid.to_string()}),
serde_json::json!({"record_type": "response", "index": 0}),
serde_json::json!({"record_type": "response", "index": 1}),
];
let expected_head = vec![serde_json::json!({
"id": uuid,
"timestamp": ts,
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})];
let expected_cursor: Cursor = serde_json::from_str(&format!("\"{ts}|{uuid}\"")).unwrap();
let expected_page = ConversationsPage {
items: vec![ConversationItem {
@@ -305,10 +354,15 @@ async fn test_get_conversation_contents() {
assert_eq!(page, expected_page);
// Entire file contents equality
let meta = serde_json::json!({"timestamp": ts, "id": uuid.to_string()});
let meta = serde_json::json!({"timestamp": ts, "type": "session_meta", "payload": {"id": uuid, "timestamp": ts, "instructions": null, "cwd": ".", "originator": "test_originator", "cli_version": "test_version"}});
let user_event = serde_json::json!({
"timestamp": ts,
"type": "event_msg",
"payload": {"type": "user_message", "message": "Hello from user", "kind": "plain"}
});
let rec0 = serde_json::json!({"record_type": "response", "index": 0});
let rec1 = serde_json::json!({"record_type": "response", "index": 1});
let expected_content = format!("{meta}\n{rec0}\n{rec1}\n");
let expected_content = format!("{meta}\n{user_event}\n{rec0}\n{rec1}\n");
assert_eq!(content, expected_content);
}
@@ -341,7 +395,14 @@ async fn test_stable_ordering_same_second_pagination() {
.join("01")
.join(format!("rollout-2025-07-01T00-00-00-{u2}.jsonl"));
let head = |u: Uuid| -> Vec<serde_json::Value> {
vec![serde_json::json!({"timestamp": ts, "id": u.to_string()})]
vec![serde_json::json!({
"id": u,
"timestamp": ts,
"instructions": null,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version"
})]
};
let expected_cursor1: Cursor = serde_json::from_str(&format!("\"{ts}|{u2}\"")).unwrap();
let expected_page1 = ConversationsPage {