Add new thread items and rewire event parsing to use them (#5418)
1. Adds AgentMessage, Reasoning, WebSearch items. 2. Switches the ResponseItem parsing to use new items and then also emit 3. Removes user-item kind and filters out "special" (environment) user items when returning to clients.
This commit is contained in:
@@ -8,12 +8,12 @@ use codex_core::LocalShellStatus;
|
||||
use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ReasoningItemContent;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use futures::StreamExt;
|
||||
use serde_json::Value;
|
||||
|
||||
@@ -13,6 +13,7 @@ use codex_core::WireApi;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use futures::StreamExt;
|
||||
use tempfile::TempDir;
|
||||
@@ -143,8 +144,8 @@ fn assert_reasoning(item: &ResponseItem, expected: &str) {
|
||||
let mut combined = String::new();
|
||||
for part in parts {
|
||||
match part {
|
||||
codex_core::ReasoningItemContent::ReasoningText { text }
|
||||
| codex_core::ReasoningItemContent::Text { text } => combined.push_str(text),
|
||||
ReasoningItemContent::ReasoningText { text }
|
||||
| ReasoningItemContent::Text { text } => combined.push_str(text),
|
||||
}
|
||||
}
|
||||
assert_eq!(combined, expected);
|
||||
|
||||
@@ -167,6 +167,56 @@ pub fn ev_assistant_message(id: &str, text: &str) -> Value {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_reasoning_item(id: &str, summary: &[&str], raw_content: &[&str]) -> Value {
|
||||
let summary_entries: Vec<Value> = summary
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "summary_text", "text": text}))
|
||||
.collect();
|
||||
|
||||
let mut event = serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "reasoning",
|
||||
"id": id,
|
||||
"summary": summary_entries,
|
||||
}
|
||||
});
|
||||
|
||||
if !raw_content.is_empty() {
|
||||
let content_entries: Vec<Value> = raw_content
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "reasoning_text", "text": text}))
|
||||
.collect();
|
||||
event["item"]["content"] = Value::Array(content_entries);
|
||||
}
|
||||
|
||||
event
|
||||
}
|
||||
|
||||
pub fn ev_web_search_call_added(id: &str, status: &str, query: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.added",
|
||||
"item": {
|
||||
"type": "web_search_call",
|
||||
"id": id,
|
||||
"status": status,
|
||||
"action": {"type": "search", "query": query}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_web_search_call_done(id: &str, status: &str, query: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "web_search_call",
|
||||
"id": id,
|
||||
"status": status,
|
||||
"action": {"type": "search", "query": query}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
|
||||
@@ -9,7 +9,6 @@ use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ReasoningItemContent;
|
||||
use codex_core::ResponseEvent;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
@@ -21,6 +20,7 @@ use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SessionSource;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use codex_protocol::models::WebSearchAction;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::content_items_to_text;
|
||||
use codex_core::is_session_prefix_message;
|
||||
use codex_core::parse_turn_item;
|
||||
use codex_core::protocol::ConversationPathResponseEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -115,19 +113,12 @@ async fn fork_conversation_twice_drops_to_first_message() {
|
||||
let find_user_input_positions = |items: &[RolloutItem]| -> Vec<usize> {
|
||||
let mut pos = Vec::new();
|
||||
for (i, it) in items.iter().enumerate() {
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = it
|
||||
&& role == "user"
|
||||
&& content_items_to_text(content)
|
||||
.is_some_and(|text| !is_session_prefix_message(&text))
|
||||
if let RolloutItem::ResponseItem(response_item) = it
|
||||
&& let Some(TurnItem::UserMessage(_)) = parse_turn_item(response_item)
|
||||
{
|
||||
// Consider any user message as an input boundary; recorder stores both EventMsg and ResponseItem.
|
||||
// We specifically look for input items, which are represented as ContentItem::InputText.
|
||||
if content
|
||||
.iter()
|
||||
.any(|c| matches!(c, ContentItem::InputText { .. }))
|
||||
{
|
||||
pos.push(i);
|
||||
}
|
||||
pos.push(i);
|
||||
}
|
||||
}
|
||||
pos
|
||||
|
||||
@@ -2,12 +2,18 @@
|
||||
|
||||
use anyhow::Ok;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ItemCompletedEvent;
|
||||
use codex_core::protocol::ItemStartedEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::ev_web_search_call_added;
|
||||
use core_test_support::responses::ev_web_search_call_done;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -26,7 +32,7 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
@@ -36,21 +42,23 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(e) => Some(e.clone()),
|
||||
let started_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::UserMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(e) => Some(e.clone()),
|
||||
let completed_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::UserMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let TurnItem::UserMessage(started_item) = started.item;
|
||||
let TurnItem::UserMessage(completed_item) = completed.item;
|
||||
|
||||
assert_eq!(started_item.id, completed_item.id);
|
||||
assert_eq!(
|
||||
started_item.content,
|
||||
@@ -66,3 +74,163 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn assistant_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "all done"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please summarize results".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
let Some(codex_protocol::items::AgentMessageContent::Text { text }) = completed.content.first()
|
||||
else {
|
||||
panic!("expected agent message text content");
|
||||
};
|
||||
assert_eq!(text, "all done");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reasoning_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let reasoning_item = ev_reasoning_item(
|
||||
"reasoning-1",
|
||||
&["Consider inputs", "Compute output"],
|
||||
&["Detailed reasoning trace"],
|
||||
);
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
reasoning_item,
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "explain your reasoning".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
assert_eq!(
|
||||
completed.summary_text,
|
||||
vec!["Consider inputs".to_string(), "Compute output".to_string()]
|
||||
);
|
||||
assert_eq!(
|
||||
completed.raw_content,
|
||||
vec!["Detailed reasoning trace".to_string()]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn web_search_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let web_search_added =
|
||||
ev_web_search_call_added("web-search-1", "in_progress", "weather seattle");
|
||||
let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle");
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
web_search_added,
|
||||
web_search_done,
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "find the weather".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::WebSearch(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::WebSearch(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
assert_eq!(completed.query, "weather seattle");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
@@ -62,3 +63,59 @@ async fn resume_includes_initial_messages_from_rollout_events() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_includes_initial_messages_from_reasoning_events() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.show_raw_agent_reasoning = true;
|
||||
});
|
||||
let initial = builder.build(&server).await?;
|
||||
let codex = Arc::clone(&initial.codex);
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial.session_configured.rollout_path.clone();
|
||||
|
||||
let initial_sse = sse(vec![
|
||||
ev_response_created("resp-initial"),
|
||||
ev_reasoning_item("reason-1", &["Summarized step"], &["raw detail"]),
|
||||
ev_assistant_message("msg-1", "Completed reasoning turn"),
|
||||
ev_completed("resp-initial"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), initial_sse).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "Record reasoning messages".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let resumed = builder.resume(&server, home, rollout_path).await?;
|
||||
let initial_messages = resumed
|
||||
.session_configured
|
||||
.initial_messages
|
||||
.expect("expected initial messages to be present for resumed session");
|
||||
match initial_messages.as_slice() {
|
||||
[
|
||||
EventMsg::UserMessage(first_user),
|
||||
EventMsg::TokenCount(_),
|
||||
EventMsg::AgentReasoning(reasoning),
|
||||
EventMsg::AgentReasoningRawContent(raw),
|
||||
EventMsg::AgentMessage(assistant_message),
|
||||
EventMsg::TokenCount(_),
|
||||
] => {
|
||||
assert_eq!(first_user.message, "Record reasoning messages");
|
||||
assert_eq!(reasoning.text, "Summarized step");
|
||||
assert_eq!(raw.text, "raw detail");
|
||||
assert_eq!(assistant_message.message, "Completed reasoning turn");
|
||||
}
|
||||
other => panic!("unexpected initial messages after resume: {other:#?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user