Phase 1: Repository & Infrastructure Setup

- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
  - Root: llmx-monorepo
  - CLI: @llmx/llmx
  - SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx

🤖 Generated with Claude Code
This commit is contained in:
Sebastian Krüger
2025-11-11 14:01:52 +01:00
parent 052b052832
commit f237fe560d
1151 changed files with 41 additions and 35 deletions

View File

@@ -0,0 +1,5 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;
mod event_processor_with_json_output;

View File

@@ -0,0 +1,939 @@
use codex_core::protocol::AgentMessageEvent;
use codex_core::protocol::AgentReasoningEvent;
use codex_core::protocol::ErrorEvent;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::FileChange;
use codex_core::protocol::McpInvocation;
use codex_core::protocol::McpToolCallBeginEvent;
use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::PatchApplyBeginEvent;
use codex_core::protocol::PatchApplyEndEvent;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::WarningEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
use codex_exec::exec_events::AgentMessageItem;
use codex_exec::exec_events::CommandExecutionItem;
use codex_exec::exec_events::CommandExecutionStatus;
use codex_exec::exec_events::ErrorItem;
use codex_exec::exec_events::ItemCompletedEvent;
use codex_exec::exec_events::ItemStartedEvent;
use codex_exec::exec_events::ItemUpdatedEvent;
use codex_exec::exec_events::McpToolCallItem;
use codex_exec::exec_events::McpToolCallItemError;
use codex_exec::exec_events::McpToolCallItemResult;
use codex_exec::exec_events::McpToolCallStatus;
use codex_exec::exec_events::PatchApplyStatus;
use codex_exec::exec_events::PatchChangeKind;
use codex_exec::exec_events::ReasoningItem;
use codex_exec::exec_events::ThreadErrorEvent;
use codex_exec::exec_events::ThreadEvent;
use codex_exec::exec_events::ThreadItem;
use codex_exec::exec_events::ThreadItemDetails;
use codex_exec::exec_events::ThreadStartedEvent;
use codex_exec::exec_events::TodoItem as ExecTodoItem;
use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
use codex_exec::exec_events::TurnCompletedEvent;
use codex_exec::exec_events::TurnFailedEvent;
use codex_exec::exec_events::TurnStartedEvent;
use codex_exec::exec_events::Usage;
use codex_exec::exec_events::WebSearchItem;
use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::PathBuf;
use std::time::Duration;
fn event(id: &str, msg: EventMsg) -> Event {
Event {
id: id.to_string(),
msg,
}
}
#[test]
fn session_configured_produces_thread_started_event() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let session_id =
codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")
.unwrap();
let rollout_path = PathBuf::from("/tmp/rollout.json");
let ev = event(
"e1",
EventMsg::SessionConfigured(SessionConfiguredEvent {
session_id,
model: "codex-mini-latest".to_string(),
reasoning_effort: None,
history_log_id: 0,
history_entry_count: 0,
initial_messages: None,
rollout_path,
}),
);
let out = ep.collect_thread_events(&ev);
assert_eq!(
out,
vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
thread_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
})]
);
}
#[test]
fn task_started_produces_turn_started_event() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"t1",
EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
model_context_window: Some(32_000),
}),
));
assert_eq!(out, vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]);
}
#[test]
fn web_search_end_emits_item_completed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let query = "rust async await".to_string();
let out = ep.collect_thread_events(&event(
"w1",
EventMsg::WebSearchEnd(WebSearchEndEvent {
call_id: "call-123".to_string(),
query: query.clone(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
},
})]
);
}
#[test]
fn plan_update_emits_todo_list_started_updated_and_completed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First plan update => item.started (todo_list)
let first = event(
"p1",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
step: "step one".to_string(),
status: StepStatus::Pending,
},
PlanItemArg {
step: "step two".to_string(),
status: StepStatus::InProgress,
},
],
}),
);
let out_first = ep.collect_thread_events(&first);
assert_eq!(
out_first,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::TodoList(ExecTodoListItem {
items: vec![
ExecTodoItem {
text: "step one".to_string(),
completed: false
},
ExecTodoItem {
text: "step two".to_string(),
completed: false
},
],
}),
},
})]
);
// Second plan update in same turn => item.updated (same id)
let second = event(
"p2",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
step: "step one".to_string(),
status: StepStatus::Completed,
},
PlanItemArg {
step: "step two".to_string(),
status: StepStatus::InProgress,
},
],
}),
);
let out_second = ep.collect_thread_events(&second);
assert_eq!(
out_second,
vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::TodoList(ExecTodoListItem {
items: vec![
ExecTodoItem {
text: "step one".to_string(),
completed: true
},
ExecTodoItem {
text: "step two".to_string(),
completed: false
},
],
}),
},
})]
);
// Task completes => item.completed (same id, latest state)
let complete = event(
"p3",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: None,
}),
);
let out_complete = ep.collect_thread_events(&complete);
assert_eq!(
out_complete,
vec![
ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::TodoList(ExecTodoListItem {
items: vec![
ExecTodoItem {
text: "step one".to_string(),
completed: true
},
ExecTodoItem {
text: "step two".to_string(),
completed: false
},
],
}),
},
}),
ThreadEvent::TurnCompleted(TurnCompletedEvent {
usage: Usage::default(),
}),
]
);
}
#[test]
fn mcp_tool_call_begin_and_end_emit_item_events() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: Some(json!({ "key": "value" })),
};
let begin = event(
"m1",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-1".to_string(),
invocation: invocation.clone(),
}),
);
let begin_events = ep.collect_thread_events(&begin);
assert_eq!(
begin_events,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
})]
);
let end = event(
"m2",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-1".to_string(),
invocation,
duration: Duration::from_secs(1),
result: Ok(CallToolResult {
content: Vec::new(),
is_error: None,
structured_content: None,
}),
}),
);
let end_events = ep.collect_thread_events(&end);
assert_eq!(
end_events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: Some(McpToolCallItemResult {
content: Vec::new(),
structured_content: None,
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
})]
);
}
#[test]
fn mcp_tool_call_failure_sets_failed_status() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_b".to_string(),
tool: "tool_y".to_string(),
arguments: Some(json!({ "param": 42 })),
};
let begin = event(
"m3",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-2".to_string(),
invocation: invocation.clone(),
}),
);
ep.collect_thread_events(&begin);
let end = event(
"m4",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-2".to_string(),
invocation,
duration: Duration::from_millis(5),
result: Err("tool exploded".to_string()),
}),
);
let events = ep.collect_thread_events(&end);
assert_eq!(
events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_b".to_string(),
tool: "tool_y".to_string(),
arguments: json!({ "param": 42 }),
result: None,
error: Some(McpToolCallItemError {
message: "tool exploded".to_string(),
}),
status: McpToolCallStatus::Failed,
}),
},
})]
);
}
#[test]
fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: None,
};
let begin = event(
"m5",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-3".to_string(),
invocation: invocation.clone(),
}),
);
let begin_events = ep.collect_thread_events(&begin);
assert_eq!(
begin_events,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
})]
);
let end = event(
"m6",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-3".to_string(),
invocation,
duration: Duration::from_millis(10),
result: Ok(CallToolResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
is_error: None,
structured_content: Some(json!({ "status": "ok" })),
}),
}),
);
let events = ep.collect_thread_events(&end);
assert_eq!(
events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: Some(McpToolCallItemResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
structured_content: Some(json!({ "status": "ok" })),
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
})]
);
}
#[test]
fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First turn: start + complete
let start = event(
"t1",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
step: "only".to_string(),
status: StepStatus::Pending,
}],
}),
);
let _ = ep.collect_thread_events(&start);
let complete = event(
"t2",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: None,
}),
);
let _ = ep.collect_thread_events(&complete);
// Second turn: a new todo list should have a new id
let start_again = event(
"t3",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
step: "again".to_string(),
status: StepStatus::Pending,
}],
}),
);
let out = ep.collect_thread_events(&start_again);
match &out[0] {
ThreadEvent::ItemStarted(ItemStartedEvent { item }) => {
assert_eq!(&item.id, "item_1");
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn agent_reasoning_produces_item_completed_reasoning() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let ev = event(
"e1",
EventMsg::AgentReasoning(AgentReasoningEvent {
text: "thinking...".to_string(),
}),
);
let out = ep.collect_thread_events(&ev);
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::Reasoning(ReasoningItem {
text: "thinking...".to_string(),
}),
},
})]
);
}
#[test]
fn agent_message_produces_item_completed_agent_message() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let ev = event(
"e1",
EventMsg::AgentMessage(AgentMessageEvent {
message: "hello".to_string(),
}),
);
let out = ep.collect_thread_events(&ev);
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::AgentMessage(AgentMessageItem {
text: "hello".to_string(),
}),
},
})]
);
}
#[test]
fn error_event_produces_error() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::Error(codex_core::protocol::ErrorEvent {
message: "boom".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::Error(ThreadErrorEvent {
message: "boom".to_string(),
})]
);
}
#[test]
fn warning_event_produces_error_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::Warning(WarningEvent {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::Error(ErrorItem {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
}),
},
})]
);
}
#[test]
fn stream_error_event_produces_error() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
message: "retrying".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::Error(ThreadErrorEvent {
message: "retrying".to_string(),
})]
);
}
#[test]
fn error_followed_by_task_complete_produces_turn_failed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let error_event = event(
"e1",
EventMsg::Error(ErrorEvent {
message: "boom".to_string(),
}),
);
assert_eq!(
ep.collect_thread_events(&error_event),
vec![ThreadEvent::Error(ThreadErrorEvent {
message: "boom".to_string(),
})]
);
let complete_event = event(
"e2",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: None,
}),
);
assert_eq!(
ep.collect_thread_events(&complete_event),
vec![ThreadEvent::TurnFailed(TurnFailedEvent {
error: ThreadErrorEvent {
message: "boom".to_string(),
},
})]
);
}
#[test]
fn exec_command_end_success_produces_completed_command_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// Begin -> no output
let begin = event(
"c1",
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: "1".to_string(),
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
cwd: std::env::current_dir().unwrap(),
parsed_cmd: Vec::new(),
is_user_shell_command: false,
}),
);
let out_begin = ep.collect_thread_events(&begin);
assert_eq!(
out_begin,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "bash -lc 'echo hi'".to_string(),
aggregated_output: String::new(),
exit_code: None,
status: CommandExecutionStatus::InProgress,
}),
},
})]
);
// End (success) -> item.completed (item_0)
let end_ok = event(
"c2",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "1".to_string(),
stdout: String::new(),
stderr: String::new(),
aggregated_output: "hi\n".to_string(),
exit_code: 0,
duration: Duration::from_millis(5),
formatted_output: String::new(),
}),
);
let out_ok = ep.collect_thread_events(&end_ok);
assert_eq!(
out_ok,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "bash -lc 'echo hi'".to_string(),
aggregated_output: "hi\n".to_string(),
exit_code: Some(0),
status: CommandExecutionStatus::Completed,
}),
},
})]
);
}
#[test]
fn exec_command_end_failure_produces_failed_command_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// Begin -> no output
let begin = event(
"c1",
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: "2".to_string(),
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
cwd: std::env::current_dir().unwrap(),
parsed_cmd: Vec::new(),
is_user_shell_command: false,
}),
);
assert_eq!(
ep.collect_thread_events(&begin),
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "sh -c 'exit 1'".to_string(),
aggregated_output: String::new(),
exit_code: None,
status: CommandExecutionStatus::InProgress,
}),
},
})]
);
// End (failure) -> item.completed (item_0)
let end_fail = event(
"c2",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "2".to_string(),
stdout: String::new(),
stderr: String::new(),
aggregated_output: String::new(),
exit_code: 1,
duration: Duration::from_millis(2),
formatted_output: String::new(),
}),
);
let out_fail = ep.collect_thread_events(&end_fail);
assert_eq!(
out_fail,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "sh -c 'exit 1'".to_string(),
aggregated_output: String::new(),
exit_code: Some(1),
status: CommandExecutionStatus::Failed,
}),
},
})]
);
}
#[test]
fn exec_command_end_without_begin_is_ignored() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// End event arrives without a prior Begin; should produce no thread events.
let end_only = event(
"c1",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "no-begin".to_string(),
stdout: String::new(),
stderr: String::new(),
aggregated_output: String::new(),
exit_code: 0,
duration: Duration::from_millis(1),
formatted_output: String::new(),
}),
);
let out = ep.collect_thread_events(&end_only);
assert!(out.is_empty());
}
#[test]
fn patch_apply_success_produces_item_completed_patchapply() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// Prepare a patch with multiple kinds of changes
let mut changes = std::collections::HashMap::new();
changes.insert(
PathBuf::from("a/added.txt"),
FileChange::Add {
content: "+hello".to_string(),
},
);
changes.insert(
PathBuf::from("b/deleted.txt"),
FileChange::Delete {
content: "-goodbye".to_string(),
},
);
changes.insert(
PathBuf::from("c/modified.txt"),
FileChange::Update {
unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
move_path: Some(PathBuf::from("c/renamed.txt")),
},
);
// Begin -> no output
let begin = event(
"p1",
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: "call-1".to_string(),
auto_approved: true,
changes: changes.clone(),
}),
);
let out_begin = ep.collect_thread_events(&begin);
assert!(out_begin.is_empty());
// End (success) -> item.completed (item_0)
let end = event(
"p2",
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: "call-1".to_string(),
stdout: "applied 3 changes".to_string(),
stderr: String::new(),
success: true,
}),
);
let out_end = ep.collect_thread_events(&end);
assert_eq!(out_end.len(), 1);
// Validate structure without relying on HashMap iteration order
match &out_end[0] {
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
assert_eq!(&item.id, "item_0");
match &item.details {
ThreadItemDetails::FileChange(file_update) => {
assert_eq!(file_update.status, PatchApplyStatus::Completed);
let mut actual: Vec<(String, PatchChangeKind)> = file_update
.changes
.iter()
.map(|c| (c.path.clone(), c.kind.clone()))
.collect();
actual.sort_by(|a, b| a.0.cmp(&b.0));
let mut expected = vec![
("a/added.txt".to_string(), PatchChangeKind::Add),
("b/deleted.txt".to_string(), PatchChangeKind::Delete),
("c/modified.txt".to_string(), PatchChangeKind::Update),
];
expected.sort_by(|a, b| a.0.cmp(&b.0));
assert_eq!(actual, expected);
}
other => panic!("unexpected details: {other:?}"),
}
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn patch_apply_failure_produces_item_completed_patchapply_failed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let mut changes = std::collections::HashMap::new();
changes.insert(
PathBuf::from("file.txt"),
FileChange::Update {
unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
move_path: None,
},
);
// Begin -> no output
let begin = event(
"p1",
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: "call-2".to_string(),
auto_approved: false,
changes: changes.clone(),
}),
);
assert!(ep.collect_thread_events(&begin).is_empty());
// End (failure) -> item.completed (item_0) with Failed status
let end = event(
"p2",
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: "call-2".to_string(),
stdout: String::new(),
stderr: "failed to apply".to_string(),
success: false,
}),
);
let out_end = ep.collect_thread_events(&end);
assert_eq!(out_end.len(), 1);
match &out_end[0] {
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
assert_eq!(&item.id, "item_0");
match &item.details {
ThreadItemDetails::FileChange(file_update) => {
assert_eq!(file_update.status, PatchApplyStatus::Failed);
assert_eq!(file_update.changes.len(), 1);
assert_eq!(file_update.changes[0].path, "file.txt".to_string());
assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
}
other => panic!("unexpected details: {other:?}"),
}
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn task_complete_produces_turn_completed_with_usage() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First, feed a TokenCount event with known totals.
let usage = codex_core::protocol::TokenUsage {
input_tokens: 1200,
cached_input_tokens: 200,
output_tokens: 345,
reasoning_output_tokens: 0,
total_tokens: 0,
};
let info = codex_core::protocol::TokenUsageInfo {
total_token_usage: usage.clone(),
last_token_usage: usage,
model_context_window: None,
};
let token_count_event = event(
"e1",
EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
info: Some(info),
rate_limits: None,
}),
);
assert!(ep.collect_thread_events(&token_count_event).is_empty());
// Then TaskComplete should produce turn.completed with the captured usage.
let complete_event = event(
"e2",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: Some("done".to_string()),
}),
);
let out = ep.collect_thread_events(&complete_event);
assert_eq!(
out,
vec![ThreadEvent::TurnCompleted(TurnCompletedEvent {
usage: Usage {
input_tokens: 1200,
cached_input_tokens: 200,
output_tokens: 345,
},
})]
);
}

View File

@@ -0,0 +1,4 @@
class BaseClass:
def method():
return True

View File

@@ -0,0 +1,10 @@
event: response.created
data: {"type":"response.created","response":{"id":"resp1"}}
event: response.output_item.done
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
event: response.completed
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}

View File

@@ -0,0 +1,151 @@
#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]
use anyhow::Context;
use assert_cmd::prelude::*;
use codex_core::CODEX_APPLY_PATCH_ARG1;
use core_test_support::responses::ev_apply_patch_custom_tool_call;
use core_test_support::responses::ev_apply_patch_function_call;
use core_test_support::responses::ev_completed;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use std::fs;
use std::process::Command;
use tempfile::tempdir;
/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
/// emulate the `apply_patch` CLI.
#[test]
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
let tmp = tempdir()?;
let relative_path = "source.txt";
let absolute_path = tmp.path().join(relative_path);
fs::write(&absolute_path, "original content\n")?;
Command::cargo_bin("codex-exec")
.context("should find binary for codex-exec")?
.arg(CODEX_APPLY_PATCH_ARG1)
.arg(
r#"*** Begin Patch
*** Update File: source.txt
@@
-original content
+modified by apply_patch
*** End Patch"#,
)
.current_dir(tmp.path())
.assert()
.success()
.stdout("Success. Updated the following files:\nM source.txt\n")
.stderr(predicates::str::is_empty());
assert_eq!(
fs::read_to_string(absolute_path)?,
"modified by apply_patch\n"
);
Ok(())
}
#[cfg(not(target_os = "windows"))]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_tool() -> anyhow::Result<()> {
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
skip_if_no_network!(Ok(()));
let test = test_codex_exec();
let tmp_path = test.cwd_path().to_path_buf();
let add_patch = r#"*** Begin Patch
*** Add File: test.md
+Hello world
*** End Patch"#;
let update_patch = r#"*** Begin Patch
*** Update File: test.md
@@
-Hello world
+Final text
*** End Patch"#;
let response_streams = vec![
sse(vec![
ev_apply_patch_custom_tool_call("request_0", add_patch),
ev_completed("request_0"),
]),
sse(vec![
ev_apply_patch_function_call("request_1", update_patch),
ev_completed("request_1"),
]),
sse(vec![ev_completed("request_2")]),
];
let server = start_mock_server().await;
mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
let final_path = tmp_path.join("test.md");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(contents, "Final text\n");
Ok(())
}
#[cfg(not(target_os = "windows"))]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
skip_if_no_network!(Ok(()));
let test = test_codex_exec();
let freeform_add_patch = r#"*** Begin Patch
*** Add File: app.py
+class BaseClass:
+ def method():
+ return False
*** End Patch"#;
let freeform_update_patch = r#"*** Begin Patch
*** Update File: app.py
@@ def method():
- return False
+
+ return True
*** End Patch"#;
let response_streams = vec![
sse(vec![
ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
ev_completed("request_0"),
]),
sse(vec![
ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
ev_completed("request_1"),
]),
sse(vec![ev_completed("request_2")]),
];
let server = start_mock_server().await;
mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
// Verify final file contents
let final_path = test.cwd_path().join("app.py");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(
contents,
include_str!("../fixtures/apply_patch_freeform_final.txt")
);
Ok(())
}

View File

@@ -0,0 +1,30 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use core_test_support::responses::ev_completed;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex_exec::test_codex_exec;
use wiremock::matchers::header;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> {
let test = test_codex_exec();
let server = start_mock_server().await;
mount_sse_once_match(
&server,
header("Authorization", "Bearer dummy"),
sse(vec![ev_completed("request_0")]),
)
.await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg("echo testing codex api key")
.assert()
.success();
Ok(())
}

View File

@@ -0,0 +1,8 @@
// Aggregates all former standalone integration tests as modules.
mod apply_patch;
mod auth_env;
mod originator;
mod output_schema;
mod resume;
mod sandbox;
mod server_error_exit;

View File

@@ -0,0 +1,52 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use wiremock::matchers::header;
/// Verify that when the server reports an error, `codex-exec` exits with a
/// non-zero status code so automation can detect failures.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn send_codex_exec_originator() -> anyhow::Result<()> {
let test = test_codex_exec();
let server = responses::start_mock_server().await;
let body = responses::sse(vec![
responses::ev_response_created("response_1"),
responses::ev_assistant_message("response_1", "Hello, world!"),
responses::ev_completed("response_1"),
]);
responses::mount_sse_once_match(&server, header("Originator", "codex_exec"), body).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("tell me something")
.assert()
.code(0);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn supports_originator_override() -> anyhow::Result<()> {
let test = test_codex_exec();
let server = responses::start_mock_server().await;
let body = responses::sse(vec![
responses::ev_response_created("response_1"),
responses::ev_assistant_message("response_1", "Hello, world!"),
responses::ev_completed("response_1"),
]);
responses::mount_sse_once_match(&server, header("Originator", "codex_exec_override"), body)
.await;
test.cmd_with_server(&server)
.env("CODEX_INTERNAL_ORIGINATOR_OVERRIDE", "codex_exec_override")
.arg("--skip-git-repo-check")
.arg("tell me something")
.assert()
.code(0);
Ok(())
}

View File

@@ -0,0 +1,63 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use serde_json::Value;
use wiremock::matchers::any;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
let test = test_codex_exec();
let schema_contents = serde_json::json!({
"type": "object",
"properties": {
"answer": { "type": "string" }
},
"required": ["answer"],
"additionalProperties": false
});
let schema_path = test.cwd_path().join("schema.json");
std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
let expected_schema: Value = schema_contents;
let server = responses::start_mock_server().await;
let body = responses::sse(vec![
responses::ev_response_created("resp1"),
responses::ev_assistant_message("m1", "fixture hello"),
responses::ev_completed("resp1"),
]);
let response_mock = responses::mount_sse_once_match(&server, any(), body).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
// keep using -C in the test to exercise the flag as well
.arg("-C")
.arg(test.cwd_path())
.arg("--output-schema")
.arg(&schema_path)
.arg("-m")
.arg("gpt-5")
.arg("tell me a joke")
.assert()
.success();
let request = response_mock.single_request();
let payload: Value = request.body_json();
let text = payload.get("text").expect("request missing text field");
let format = text
.get("format")
.expect("request missing text.format field");
assert_eq!(
format,
&serde_json::json!({
"name": "codex_output_schema",
"type": "json_schema",
"strict": true,
"schema": expected_schema,
})
);
Ok(())
}

View File

@@ -0,0 +1,257 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Context;
use core_test_support::test_codex_exec::test_codex_exec;
use serde_json::Value;
use std::path::Path;
use std::string::ToString;
use uuid::Uuid;
use walkdir::WalkDir;
/// Utility: scan the sessions dir for a rollout file that contains `marker`
/// in any response_item.message.content entry. Returns the absolute path.
fn find_session_file_containing_marker(
sessions_dir: &std::path::Path,
marker: &str,
) -> Option<std::path::PathBuf> {
for entry in WalkDir::new(sessions_dir) {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
continue;
}
let path = entry.path();
let Ok(content) = std::fs::read_to_string(path) else {
continue;
};
// Skip the first meta line and scan remaining JSONL entries.
let mut lines = content.lines();
if lines.next().is_none() {
continue;
}
for line in lines {
if line.trim().is_empty() {
continue;
}
let Ok(item): Result<Value, _> = serde_json::from_str(line) else {
continue;
};
if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
&& let Some(payload) = item.get("payload")
&& payload.get("type").and_then(|t| t.as_str()) == Some("message")
&& payload
.get("content")
.map(ToString::to_string)
.unwrap_or_default()
.contains(marker)
{
return Some(path.to_path_buf());
}
}
}
None
}
/// Extract the conversation UUID from the first SessionMeta line in the rollout file.
fn extract_conversation_id(path: &std::path::Path) -> String {
let content = std::fs::read_to_string(path).unwrap();
let mut lines = content.lines();
let meta_line = lines.next().expect("missing meta line");
let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json");
meta.get("payload")
.and_then(|p| p.get("id"))
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string()
}
#[test]
fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> {
let test = test_codex_exec();
let fixture =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
// 1) First run: create a session with a unique marker in the content.
let marker = format!("resume-last-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
.assert()
.success();
// Find the created session file containing the marker.
let sessions_dir = test.home_path().join("sessions");
let path = find_session_file_containing_marker(&sessions_dir, &marker)
.expect("no session file found after first run");
// 2) Second run: resume the most recent file with a new marker.
let marker2 = format!("resume-last-2-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
.arg("resume")
.arg("--last")
.assert()
.success();
// Ensure the same file was updated and contains both markers.
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
.expect("no resumed session file containing marker2");
assert_eq!(
resumed_path, path,
"resume --last should append to existing file"
);
let content = std::fs::read_to_string(&resumed_path)?;
assert!(content.contains(&marker));
assert!(content.contains(&marker2));
Ok(())
}
#[test]
fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> {
let test = test_codex_exec();
let fixture =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
// 1) First run: create a session
let marker = format!("resume-by-id-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
.assert()
.success();
let sessions_dir = test.home_path().join("sessions");
let path = find_session_file_containing_marker(&sessions_dir, &marker)
.expect("no session file found after first run");
let session_id = extract_conversation_id(&path);
assert!(
!session_id.is_empty(),
"missing conversation id in meta line"
);
// 2) Resume by id
let marker2 = format!("resume-by-id-2-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
.arg("resume")
.arg(&session_id)
.assert()
.success();
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
.expect("no resumed session file containing marker2");
assert_eq!(
resumed_path, path,
"resume by id should append to existing file"
);
let content = std::fs::read_to_string(&resumed_path)?;
assert!(content.contains(&marker));
assert!(content.contains(&marker2));
Ok(())
}
#[test]
fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
let test = test_codex_exec();
let fixture =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
let marker = format!("resume-config-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("--sandbox")
.arg("workspace-write")
.arg("--model")
.arg("gpt-5")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
.assert()
.success();
let sessions_dir = test.home_path().join("sessions");
let path = find_session_file_containing_marker(&sessions_dir, &marker)
.expect("no session file found after first run");
let marker2 = format!("resume-config-2-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
let output = test
.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("--sandbox")
.arg("workspace-write")
.arg("--model")
.arg("gpt-5-high")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
.arg("resume")
.arg("--last")
.output()
.context("resume run should succeed")?;
assert!(output.status.success(), "resume run failed: {output:?}");
let stderr = String::from_utf8(output.stderr)?;
assert!(
stderr.contains("model: gpt-5-high"),
"stderr missing model override: {stderr}"
);
if cfg!(target_os = "windows") {
assert!(
stderr.contains("sandbox: read-only"),
"stderr missing downgraded sandbox note: {stderr}"
);
} else {
assert!(
stderr.contains("sandbox: workspace-write"),
"stderr missing sandbox override: {stderr}"
);
}
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
.expect("no resumed session file containing marker2");
assert_eq!(resumed_path, path, "resume should append to same file");
let content = std::fs::read_to_string(&resumed_path)?;
assert!(content.contains(&marker));
assert!(content.contains(&marker2));
Ok(())
}

View File

@@ -0,0 +1,322 @@
#![cfg(unix)]
use codex_core::protocol::SandboxPolicy;
use codex_core::spawn::StdioPolicy;
use std::collections::HashMap;
use std::future::Future;
use std::io;
use std::path::Path;
use std::path::PathBuf;
use std::process::ExitStatus;
use tokio::fs::create_dir_all;
use tokio::process::Child;
#[cfg(target_os = "macos")]
async fn spawn_command_under_sandbox(
command: Vec<String>,
command_cwd: PathBuf,
sandbox_policy: &SandboxPolicy,
sandbox_cwd: &Path,
stdio_policy: StdioPolicy,
env: HashMap<String, String>,
) -> std::io::Result<Child> {
use codex_core::seatbelt::spawn_command_under_seatbelt;
spawn_command_under_seatbelt(
command,
command_cwd,
sandbox_policy,
sandbox_cwd,
stdio_policy,
env,
)
.await
}
#[cfg(target_os = "linux")]
async fn spawn_command_under_sandbox(
command: Vec<String>,
command_cwd: PathBuf,
sandbox_policy: &SandboxPolicy,
sandbox_cwd: &Path,
stdio_policy: StdioPolicy,
env: HashMap<String, String>,
) -> std::io::Result<Child> {
use codex_core::landlock::spawn_command_under_linux_sandbox;
let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
spawn_command_under_linux_sandbox(
codex_linux_sandbox_exe,
command,
command_cwd,
sandbox_policy,
sandbox_cwd,
stdio_policy,
env,
)
.await
}
#[tokio::test]
async fn python_multiprocessing_lock_works_under_sandbox() {
core_test_support::skip_if_sandbox!();
#[cfg(target_os = "macos")]
let writable_roots = Vec::<PathBuf>::new();
// From https://man7.org/linux/man-pages/man7/sem_overview.7.html
//
// > On Linux, named semaphores are created in a virtual filesystem,
// > normally mounted under /dev/shm.
#[cfg(target_os = "linux")]
let writable_roots = vec![PathBuf::from("/dev/shm")];
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots,
network_access: false,
exclude_tmpdir_env_var: false,
exclude_slash_tmp: false,
};
let python_code = r#"import multiprocessing
from multiprocessing import Lock, Process
def f(lock):
with lock:
print("Lock acquired in child process")
if __name__ == '__main__':
lock = Lock()
p = Process(target=f, args=(lock,))
p.start()
p.join()
"#;
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
let sandbox_cwd = command_cwd.clone();
let mut child = spawn_command_under_sandbox(
vec![
"python3".to_string(),
"-c".to_string(),
python_code.to_string(),
],
command_cwd,
&policy,
sandbox_cwd.as_path(),
StdioPolicy::Inherit,
HashMap::new(),
)
.await
.expect("should be able to spawn python under sandbox");
let status = child.wait().await.expect("should wait for child process");
assert!(status.success(), "python exited with {status:?}");
}
#[tokio::test]
async fn sandbox_distinguishes_command_and_policy_cwds() {
core_test_support::skip_if_sandbox!();
let temp = tempfile::tempdir().expect("should be able to create temp dir");
let sandbox_root = temp.path().join("sandbox");
let command_root = temp.path().join("command");
create_dir_all(&sandbox_root).await.expect("mkdir");
create_dir_all(&command_root).await.expect("mkdir");
let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
.await
.expect("canonicalize sandbox root");
let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
let disallowed_path = command_root.join("forbidden.txt");
// Note writable_roots is empty: verify that `canonical_allowed_path` is
// writable only because it is under the sandbox policy cwd, not because it
// is under a writable root.
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![],
network_access: false,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
};
// Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
let mut child = spawn_command_under_sandbox(
vec![
"bash".to_string(),
"-lc".to_string(),
"echo forbidden > forbidden.txt".to_string(),
],
command_root.clone(),
&policy,
canonical_sandbox_root.as_path(),
StdioPolicy::Inherit,
HashMap::new(),
)
.await
.expect("should spawn command writing to forbidden path");
let status = child
.wait()
.await
.expect("should wait for forbidden command");
assert!(
!status.success(),
"sandbox unexpectedly allowed writing to command cwd: {status:?}"
);
let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
.await
.expect("try_exists failed");
assert!(
!forbidden_exists,
"forbidden path should not have been created"
);
// Writing to the sandbox policy cwd after changing directories into it should succeed.
let mut child = spawn_command_under_sandbox(
vec![
"/usr/bin/touch".to_string(),
canonical_allowed_path.to_string_lossy().into_owned(),
],
command_root,
&policy,
canonical_sandbox_root.as_path(),
StdioPolicy::Inherit,
HashMap::new(),
)
.await
.expect("should spawn command writing to sandbox root");
let status = child.wait().await.expect("should wait for allowed command");
assert!(
status.success(),
"sandbox blocked allowed write: {status:?}"
);
let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
.await
.expect("try_exists allowed failed");
assert!(allowed_exists, "allowed path should exist");
}
fn unix_sock_body() {
unsafe {
let mut fds = [0i32; 2];
let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
assert_eq!(
r,
0,
"socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
io::Error::last_os_error()
);
let msg = b"hello_unix";
// write() from one end (generic write is allowed)
let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
// recvfrom() on the other end. We dont need the address for socketpair,
// so we pass null pointers for src address.
let mut buf = [0u8; 64];
let recvd = libc::recvfrom(
fds[1],
buf.as_mut_ptr() as *mut libc::c_void,
buf.len(),
0,
std::ptr::null_mut(),
std::ptr::null_mut(),
);
assert!(
recvd >= 0,
"recvfrom() failed: {}",
io::Error::last_os_error()
);
let recvd_slice = &buf[..(recvd as usize)];
assert_eq!(
recvd_slice,
&msg[..],
"payload mismatch: sent {} bytes, got {} bytes",
msg.len(),
recvd
);
// Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
let mut sfds = [0i32; 2];
let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
assert_eq!(
sr,
0,
"socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
io::Error::last_os_error()
);
let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
assert!(
snt2 >= 0,
"write(stream) failed: {}",
io::Error::last_os_error()
);
let mut b2 = [0u8; 64];
let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
assert!(
rcv2 >= 0,
"recv(stream) failed: {}",
io::Error::last_os_error()
);
// Clean up
let _ = libc::close(sfds[0]);
let _ = libc::close(sfds[1]);
let _ = libc::close(fds[0]);
let _ = libc::close(fds[1]);
}
}
#[tokio::test]
async fn allow_unix_socketpair_recvfrom() {
run_code_under_sandbox(
"allow_unix_socketpair_recvfrom",
&SandboxPolicy::ReadOnly,
|| async { unix_sock_body() },
)
.await
.expect("should be able to reexec");
}
const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
#[expect(clippy::expect_used)]
pub async fn run_code_under_sandbox<F, Fut>(
test_selector: &str,
policy: &SandboxPolicy,
child_body: F,
) -> io::Result<Option<ExitStatus>>
where
F: FnOnce() -> Fut + Send + 'static,
Fut: Future<Output = ()> + Send + 'static,
{
if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
let exe = std::env::current_exe()?;
let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
let mut stdio_policy = StdioPolicy::RedirectForShellTool;
// Allow for us to pass forward --nocapture / use the right stdio policy.
if std::env::args().any(|a| a == "--nocapture") {
cmds.push("--nocapture".into());
stdio_policy = StdioPolicy::Inherit;
}
cmds.push(test_selector.into());
// Your existing launcher:
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
let sandbox_cwd = command_cwd.clone();
let mut child = spawn_command_under_sandbox(
cmds,
command_cwd,
policy,
sandbox_cwd.as_path(),
stdio_policy,
HashMap::from([("IN_SANDBOX".into(), "1".into())]),
)
.await?;
let status = child.wait().await?;
Ok(Some(status))
} else {
// Child branch: run the provided body.
child_body().await;
Ok(None)
}
}

View File

@@ -0,0 +1,34 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use wiremock::matchers::any;
/// Verify that when the server reports an error, `codex-exec` exits with a
/// non-zero status code so automation can detect failures.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
let test = test_codex_exec();
// Mock a simple Responses API SSE stream that immediately reports a
// `response.failed` event with an error message.
let server = responses::start_mock_server().await;
let body = responses::sse(vec![serde_json::json!({
"type": "response.failed",
"response": {
"id": "resp_err_1",
"error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
}
})]);
responses::mount_sse_once_match(&server, any(), body).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("tell me something")
.arg("--experimental-json")
.assert()
.code(1);
Ok(())
}