Add turn started/completed events and correct exit code on error (#4309)
Adds new event for session completed that includes usage. Also ensures
we return 1 on failures.
```
{
"type": "session.created",
"session_id": "019987a7-93e7-7b20-9e05-e90060e411ea"
}
{
"type": "turn.started"
}
...
{
"type": "turn.completed",
"usage": {
"input_tokens": 78913,
"cached_input_tokens": 65280,
"output_tokens": 1099
}
}
```
This commit is contained in:
@@ -24,6 +24,9 @@ use codex_exec::exec_events::ReasoningItem;
|
||||
use codex_exec::exec_events::SessionCreatedEvent;
|
||||
use codex_exec::exec_events::TodoItem as ExecTodoItem;
|
||||
use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
|
||||
use codex_exec::exec_events::TurnCompletedEvent;
|
||||
use codex_exec::exec_events::TurnStartedEvent;
|
||||
use codex_exec::exec_events::Usage;
|
||||
use codex_exec::experimental_event_processor_with_json_output::ExperimentalEventProcessorWithJsonOutput;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::PathBuf;
|
||||
@@ -65,6 +68,22 @@ fn session_configured_produces_session_created_event() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_started_produces_turn_started_event() {
|
||||
let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_conversation_events(&event(
|
||||
"t1",
|
||||
EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
|
||||
model_context_window: Some(32_000),
|
||||
}),
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ConversationEvent::TurnStarted(TurnStartedEvent {})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_update_emits_todo_list_started_updated_and_completed() {
|
||||
use codex_core::plan_tool::PlanItemArg;
|
||||
@@ -161,23 +180,28 @@ fn plan_update_emits_todo_list_started_updated_and_completed() {
|
||||
let out_complete = ep.collect_conversation_events(&complete);
|
||||
assert_eq!(
|
||||
out_complete,
|
||||
vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ConversationItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ConversationItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: true
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
})]
|
||||
vec![
|
||||
ConversationEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ConversationItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ConversationItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: true
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
}),
|
||||
ConversationEvent::TurnCompleted(TurnCompletedEvent {
|
||||
usage: Usage::default(),
|
||||
}),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -585,3 +609,52 @@ fn patch_apply_failure_produces_item_completed_patchapply_failed() {
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_complete_produces_turn_completed_with_usage() {
|
||||
let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First, feed a TokenCount event with known totals.
|
||||
let usage = codex_core::protocol::TokenUsage {
|
||||
input_tokens: 1200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 345,
|
||||
reasoning_output_tokens: 0,
|
||||
total_tokens: 0,
|
||||
};
|
||||
let info = codex_core::protocol::TokenUsageInfo {
|
||||
total_token_usage: usage.clone(),
|
||||
last_token_usage: usage,
|
||||
model_context_window: None,
|
||||
};
|
||||
let token_count_event = event(
|
||||
"e1",
|
||||
EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
|
||||
info: Some(info),
|
||||
rate_limits: None,
|
||||
}),
|
||||
);
|
||||
assert!(
|
||||
ep.collect_conversation_events(&token_count_event)
|
||||
.is_empty()
|
||||
);
|
||||
|
||||
// Then TaskComplete should produce turn.completed with the captured usage.
|
||||
let complete_event = event(
|
||||
"e2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: Some("done".to_string()),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_conversation_events(&complete_event);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ConversationEvent::TurnCompleted(TurnCompletedEvent {
|
||||
usage: Usage {
|
||||
input_tokens: 1200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 345,
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -3,3 +3,4 @@ mod apply_patch;
|
||||
mod output_schema;
|
||||
mod resume;
|
||||
mod sandbox;
|
||||
mod server_error_exit;
|
||||
|
||||
34
codex-rs/exec/tests/suite/server_error_exit.rs
Normal file
34
codex-rs/exec/tests/suite/server_error_exit.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
/// Verify that when the server reports an error, `codex-exec` exits with a
|
||||
/// non-zero status code so automation can detect failures.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
// Mock a simple Responses API SSE stream that immediately reports a
|
||||
// `response.failed` event with an error message.
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![serde_json::json!({
|
||||
"type": "response.failed",
|
||||
"response": {
|
||||
"id": "resp_err_1",
|
||||
"error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
|
||||
}
|
||||
})]);
|
||||
responses::mount_sse_once(&server, any(), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.arg("--experimental-json")
|
||||
.assert()
|
||||
.code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user