Phase 1: Repository & Infrastructure Setup

- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli - Updated package.json files: - Root: llmx-monorepo - CLI: @llmx/llmx - SDK: @llmx/llmx-sdk - Updated pnpm workspace configuration - Renamed binary: codex.js -> llmx.js - Updated environment variables: CODEX_* -> LLMX_* - Changed repository URLs to valknar/llmx 🤖 Generated with Claude Code
2025-11-11 14:01:52 +01:00
parent 052b052832
commit f237fe560d
1151 changed files with 41 additions and 35 deletions
--- a/llmx-rs/exec/tests/all.rs
+++ b/llmx-rs/exec/tests/all.rs
@@ -0,0 +1,5 @@
+// Single integration test binary that aggregates all test modules.
+// The submodules live in `tests/suite/`.
+mod suite;
+
+mod event_processor_with_json_output;
--- a/llmx-rs/exec/tests/event_processor_with_json_output.rs
+++ b/llmx-rs/exec/tests/event_processor_with_json_output.rs
@@ -0,0 +1,939 @@
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningEvent;
+use codex_core::protocol::ErrorEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::McpInvocation;
+use codex_core::protocol::McpToolCallBeginEvent;
+use codex_core::protocol::McpToolCallEndEvent;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::WarningEvent;
+use codex_core::protocol::WebSearchEndEvent;
+use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
+use codex_exec::exec_events::AgentMessageItem;
+use codex_exec::exec_events::CommandExecutionItem;
+use codex_exec::exec_events::CommandExecutionStatus;
+use codex_exec::exec_events::ErrorItem;
+use codex_exec::exec_events::ItemCompletedEvent;
+use codex_exec::exec_events::ItemStartedEvent;
+use codex_exec::exec_events::ItemUpdatedEvent;
+use codex_exec::exec_events::McpToolCallItem;
+use codex_exec::exec_events::McpToolCallItemError;
+use codex_exec::exec_events::McpToolCallItemResult;
+use codex_exec::exec_events::McpToolCallStatus;
+use codex_exec::exec_events::PatchApplyStatus;
+use codex_exec::exec_events::PatchChangeKind;
+use codex_exec::exec_events::ReasoningItem;
+use codex_exec::exec_events::ThreadErrorEvent;
+use codex_exec::exec_events::ThreadEvent;
+use codex_exec::exec_events::ThreadItem;
+use codex_exec::exec_events::ThreadItemDetails;
+use codex_exec::exec_events::ThreadStartedEvent;
+use codex_exec::exec_events::TodoItem as ExecTodoItem;
+use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
+use codex_exec::exec_events::TurnCompletedEvent;
+use codex_exec::exec_events::TurnFailedEvent;
+use codex_exec::exec_events::TurnStartedEvent;
+use codex_exec::exec_events::Usage;
+use codex_exec::exec_events::WebSearchItem;
+use codex_protocol::plan_tool::PlanItemArg;
+use codex_protocol::plan_tool::StepStatus;
+use codex_protocol::plan_tool::UpdatePlanArgs;
+use mcp_types::CallToolResult;
+use mcp_types::ContentBlock;
+use mcp_types::TextContent;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use std::path::PathBuf;
+use std::time::Duration;
+
+fn event(id: &str, msg: EventMsg) -> Event {
+    Event {
+        id: id.to_string(),
+        msg,
+    }
+}
+
+#[test]
+fn session_configured_produces_thread_started_event() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let session_id =
+        codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")
+            .unwrap();
+    let rollout_path = PathBuf::from("/tmp/rollout.json");
+    let ev = event(
+        "e1",
+        EventMsg::SessionConfigured(SessionConfiguredEvent {
+            session_id,
+            model: "codex-mini-latest".to_string(),
+            reasoning_effort: None,
+            history_log_id: 0,
+            history_entry_count: 0,
+            initial_messages: None,
+            rollout_path,
+        }),
+    );
+    let out = ep.collect_thread_events(&ev);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
+            thread_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn task_started_produces_turn_started_event() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "t1",
+        EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
+            model_context_window: Some(32_000),
+        }),
+    ));
+
+    assert_eq!(out, vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]);
+}
+
+#[test]
+fn web_search_end_emits_item_completed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let query = "rust async await".to_string();
+    let out = ep.collect_thread_events(&event(
+        "w1",
+        EventMsg::WebSearchEnd(WebSearchEndEvent {
+            call_id: "call-123".to_string(),
+            query: query.clone(),
+        }),
+    ));
+
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn plan_update_emits_todo_list_started_updated_and_completed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // First plan update => item.started (todo_list)
+    let first = event(
+        "p1",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![
+                PlanItemArg {
+                    step: "step one".to_string(),
+                    status: StepStatus::Pending,
+                },
+                PlanItemArg {
+                    step: "step two".to_string(),
+                    status: StepStatus::InProgress,
+                },
+            ],
+        }),
+    );
+    let out_first = ep.collect_thread_events(&first);
+    assert_eq!(
+        out_first,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::TodoList(ExecTodoListItem {
+                    items: vec![
+                        ExecTodoItem {
+                            text: "step one".to_string(),
+                            completed: false
+                        },
+                        ExecTodoItem {
+                            text: "step two".to_string(),
+                            completed: false
+                        },
+                    ],
+                }),
+            },
+        })]
+    );
+
+    // Second plan update in same turn => item.updated (same id)
+    let second = event(
+        "p2",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![
+                PlanItemArg {
+                    step: "step one".to_string(),
+                    status: StepStatus::Completed,
+                },
+                PlanItemArg {
+                    step: "step two".to_string(),
+                    status: StepStatus::InProgress,
+                },
+            ],
+        }),
+    );
+    let out_second = ep.collect_thread_events(&second);
+    assert_eq!(
+        out_second,
+        vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::TodoList(ExecTodoListItem {
+                    items: vec![
+                        ExecTodoItem {
+                            text: "step one".to_string(),
+                            completed: true
+                        },
+                        ExecTodoItem {
+                            text: "step two".to_string(),
+                            completed: false
+                        },
+                    ],
+                }),
+            },
+        })]
+    );
+
+    // Task completes => item.completed (same id, latest state)
+    let complete = event(
+        "p3",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    let out_complete = ep.collect_thread_events(&complete);
+    assert_eq!(
+        out_complete,
+        vec![
+            ThreadEvent::ItemCompleted(ItemCompletedEvent {
+                item: ThreadItem {
+                    id: "item_0".to_string(),
+                    details: ThreadItemDetails::TodoList(ExecTodoListItem {
+                        items: vec![
+                            ExecTodoItem {
+                                text: "step one".to_string(),
+                                completed: true
+                            },
+                            ExecTodoItem {
+                                text: "step two".to_string(),
+                                completed: false
+                            },
+                        ],
+                    }),
+                },
+            }),
+            ThreadEvent::TurnCompleted(TurnCompletedEvent {
+                usage: Usage::default(),
+            }),
+        ]
+    );
+}
+
+#[test]
+fn mcp_tool_call_begin_and_end_emit_item_events() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let invocation = McpInvocation {
+        server: "server_a".to_string(),
+        tool: "tool_x".to_string(),
+        arguments: Some(json!({ "key": "value" })),
+    };
+
+    let begin = event(
+        "m1",
+        EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+            call_id: "call-1".to_string(),
+            invocation: invocation.clone(),
+        }),
+    );
+    let begin_events = ep.collect_thread_events(&begin);
+    assert_eq!(
+        begin_events,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_a".to_string(),
+                    tool: "tool_x".to_string(),
+                    arguments: json!({ "key": "value" }),
+                    result: None,
+                    error: None,
+                    status: McpToolCallStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    let end = event(
+        "m2",
+        EventMsg::McpToolCallEnd(McpToolCallEndEvent {
+            call_id: "call-1".to_string(),
+            invocation,
+            duration: Duration::from_secs(1),
+            result: Ok(CallToolResult {
+                content: Vec::new(),
+                is_error: None,
+                structured_content: None,
+            }),
+        }),
+    );
+    let end_events = ep.collect_thread_events(&end);
+    assert_eq!(
+        end_events,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_a".to_string(),
+                    tool: "tool_x".to_string(),
+                    arguments: json!({ "key": "value" }),
+                    result: Some(McpToolCallItemResult {
+                        content: Vec::new(),
+                        structured_content: None,
+                    }),
+                    error: None,
+                    status: McpToolCallStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn mcp_tool_call_failure_sets_failed_status() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let invocation = McpInvocation {
+        server: "server_b".to_string(),
+        tool: "tool_y".to_string(),
+        arguments: Some(json!({ "param": 42 })),
+    };
+
+    let begin = event(
+        "m3",
+        EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+            call_id: "call-2".to_string(),
+            invocation: invocation.clone(),
+        }),
+    );
+    ep.collect_thread_events(&begin);
+
+    let end = event(
+        "m4",
+        EventMsg::McpToolCallEnd(McpToolCallEndEvent {
+            call_id: "call-2".to_string(),
+            invocation,
+            duration: Duration::from_millis(5),
+            result: Err("tool exploded".to_string()),
+        }),
+    );
+    let events = ep.collect_thread_events(&end);
+    assert_eq!(
+        events,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_b".to_string(),
+                    tool: "tool_y".to_string(),
+                    arguments: json!({ "param": 42 }),
+                    result: None,
+                    error: Some(McpToolCallItemError {
+                        message: "tool exploded".to_string(),
+                    }),
+                    status: McpToolCallStatus::Failed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let invocation = McpInvocation {
+        server: "server_c".to_string(),
+        tool: "tool_z".to_string(),
+        arguments: None,
+    };
+
+    let begin = event(
+        "m5",
+        EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+            call_id: "call-3".to_string(),
+            invocation: invocation.clone(),
+        }),
+    );
+    let begin_events = ep.collect_thread_events(&begin);
+    assert_eq!(
+        begin_events,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_c".to_string(),
+                    tool: "tool_z".to_string(),
+                    arguments: serde_json::Value::Null,
+                    result: None,
+                    error: None,
+                    status: McpToolCallStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    let end = event(
+        "m6",
+        EventMsg::McpToolCallEnd(McpToolCallEndEvent {
+            call_id: "call-3".to_string(),
+            invocation,
+            duration: Duration::from_millis(10),
+            result: Ok(CallToolResult {
+                content: vec![ContentBlock::TextContent(TextContent {
+                    annotations: None,
+                    text: "done".to_string(),
+                    r#type: "text".to_string(),
+                })],
+                is_error: None,
+                structured_content: Some(json!({ "status": "ok" })),
+            }),
+        }),
+    );
+    let events = ep.collect_thread_events(&end);
+    assert_eq!(
+        events,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_c".to_string(),
+                    tool: "tool_z".to_string(),
+                    arguments: serde_json::Value::Null,
+                    result: Some(McpToolCallItemResult {
+                        content: vec![ContentBlock::TextContent(TextContent {
+                            annotations: None,
+                            text: "done".to_string(),
+                            r#type: "text".to_string(),
+                        })],
+                        structured_content: Some(json!({ "status": "ok" })),
+                    }),
+                    error: None,
+                    status: McpToolCallStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // First turn: start + complete
+    let start = event(
+        "t1",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![PlanItemArg {
+                step: "only".to_string(),
+                status: StepStatus::Pending,
+            }],
+        }),
+    );
+    let _ = ep.collect_thread_events(&start);
+    let complete = event(
+        "t2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    let _ = ep.collect_thread_events(&complete);
+
+    // Second turn: a new todo list should have a new id
+    let start_again = event(
+        "t3",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![PlanItemArg {
+                step: "again".to_string(),
+                status: StepStatus::Pending,
+            }],
+        }),
+    );
+    let out = ep.collect_thread_events(&start_again);
+
+    match &out[0] {
+        ThreadEvent::ItemStarted(ItemStartedEvent { item }) => {
+            assert_eq!(&item.id, "item_1");
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn agent_reasoning_produces_item_completed_reasoning() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let ev = event(
+        "e1",
+        EventMsg::AgentReasoning(AgentReasoningEvent {
+            text: "thinking...".to_string(),
+        }),
+    );
+    let out = ep.collect_thread_events(&ev);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::Reasoning(ReasoningItem {
+                    text: "thinking...".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn agent_message_produces_item_completed_agent_message() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let ev = event(
+        "e1",
+        EventMsg::AgentMessage(AgentMessageEvent {
+            message: "hello".to_string(),
+        }),
+    );
+    let out = ep.collect_thread_events(&ev);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::AgentMessage(AgentMessageItem {
+                    text: "hello".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn error_event_produces_error() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::Error(codex_core::protocol::ErrorEvent {
+            message: "boom".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::Error(ThreadErrorEvent {
+            message: "boom".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn warning_event_produces_error_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::Warning(WarningEvent {
+            message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::Error(ErrorItem {
+                    message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn stream_error_event_produces_error() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
+            message: "retrying".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::Error(ThreadErrorEvent {
+            message: "retrying".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn error_followed_by_task_complete_produces_turn_failed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    let error_event = event(
+        "e1",
+        EventMsg::Error(ErrorEvent {
+            message: "boom".to_string(),
+        }),
+    );
+    assert_eq!(
+        ep.collect_thread_events(&error_event),
+        vec![ThreadEvent::Error(ThreadErrorEvent {
+            message: "boom".to_string(),
+        })]
+    );
+
+    let complete_event = event(
+        "e2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    assert_eq!(
+        ep.collect_thread_events(&complete_event),
+        vec![ThreadEvent::TurnFailed(TurnFailedEvent {
+            error: ThreadErrorEvent {
+                message: "boom".to_string(),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_success_produces_completed_command_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // Begin -> no output
+    let begin = event(
+        "c1",
+        EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+            call_id: "1".to_string(),
+            command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
+            cwd: std::env::current_dir().unwrap(),
+            parsed_cmd: Vec::new(),
+            is_user_shell_command: false,
+        }),
+    );
+    let out_begin = ep.collect_thread_events(&begin);
+    assert_eq!(
+        out_begin,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "bash -lc 'echo hi'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: None,
+                    status: CommandExecutionStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    // End (success) -> item.completed (item_0)
+    let end_ok = event(
+        "c2",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "1".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: "hi\n".to_string(),
+            exit_code: 0,
+            duration: Duration::from_millis(5),
+            formatted_output: String::new(),
+        }),
+    );
+    let out_ok = ep.collect_thread_events(&end_ok);
+    assert_eq!(
+        out_ok,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "bash -lc 'echo hi'".to_string(),
+                    aggregated_output: "hi\n".to_string(),
+                    exit_code: Some(0),
+                    status: CommandExecutionStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_failure_produces_failed_command_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // Begin -> no output
+    let begin = event(
+        "c1",
+        EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+            call_id: "2".to_string(),
+            command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
+            cwd: std::env::current_dir().unwrap(),
+            parsed_cmd: Vec::new(),
+            is_user_shell_command: false,
+        }),
+    );
+    assert_eq!(
+        ep.collect_thread_events(&begin),
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "sh -c 'exit 1'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: None,
+                    status: CommandExecutionStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    // End (failure) -> item.completed (item_0)
+    let end_fail = event(
+        "c2",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "2".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: String::new(),
+            exit_code: 1,
+            duration: Duration::from_millis(2),
+            formatted_output: String::new(),
+        }),
+    );
+    let out_fail = ep.collect_thread_events(&end_fail);
+    assert_eq!(
+        out_fail,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "sh -c 'exit 1'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: Some(1),
+                    status: CommandExecutionStatus::Failed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_without_begin_is_ignored() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // End event arrives without a prior Begin; should produce no thread events.
+    let end_only = event(
+        "c1",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "no-begin".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: String::new(),
+            exit_code: 0,
+            duration: Duration::from_millis(1),
+            formatted_output: String::new(),
+        }),
+    );
+    let out = ep.collect_thread_events(&end_only);
+    assert!(out.is_empty());
+}
+
+#[test]
+fn patch_apply_success_produces_item_completed_patchapply() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // Prepare a patch with multiple kinds of changes
+    let mut changes = std::collections::HashMap::new();
+    changes.insert(
+        PathBuf::from("a/added.txt"),
+        FileChange::Add {
+            content: "+hello".to_string(),
+        },
+    );
+    changes.insert(
+        PathBuf::from("b/deleted.txt"),
+        FileChange::Delete {
+            content: "-goodbye".to_string(),
+        },
+    );
+    changes.insert(
+        PathBuf::from("c/modified.txt"),
+        FileChange::Update {
+            unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
+            move_path: Some(PathBuf::from("c/renamed.txt")),
+        },
+    );
+
+    // Begin -> no output
+    let begin = event(
+        "p1",
+        EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+            call_id: "call-1".to_string(),
+            auto_approved: true,
+            changes: changes.clone(),
+        }),
+    );
+    let out_begin = ep.collect_thread_events(&begin);
+    assert!(out_begin.is_empty());
+
+    // End (success) -> item.completed (item_0)
+    let end = event(
+        "p2",
+        EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+            call_id: "call-1".to_string(),
+            stdout: "applied 3 changes".to_string(),
+            stderr: String::new(),
+            success: true,
+        }),
+    );
+    let out_end = ep.collect_thread_events(&end);
+    assert_eq!(out_end.len(), 1);
+
+    // Validate structure without relying on HashMap iteration order
+    match &out_end[0] {
+        ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
+            assert_eq!(&item.id, "item_0");
+            match &item.details {
+                ThreadItemDetails::FileChange(file_update) => {
+                    assert_eq!(file_update.status, PatchApplyStatus::Completed);
+
+                    let mut actual: Vec<(String, PatchChangeKind)> = file_update
+                        .changes
+                        .iter()
+                        .map(|c| (c.path.clone(), c.kind.clone()))
+                        .collect();
+                    actual.sort_by(|a, b| a.0.cmp(&b.0));
+
+                    let mut expected = vec![
+                        ("a/added.txt".to_string(), PatchChangeKind::Add),
+                        ("b/deleted.txt".to_string(), PatchChangeKind::Delete),
+                        ("c/modified.txt".to_string(), PatchChangeKind::Update),
+                    ];
+                    expected.sort_by(|a, b| a.0.cmp(&b.0));
+
+                    assert_eq!(actual, expected);
+                }
+                other => panic!("unexpected details: {other:?}"),
+            }
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn patch_apply_failure_produces_item_completed_patchapply_failed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    let mut changes = std::collections::HashMap::new();
+    changes.insert(
+        PathBuf::from("file.txt"),
+        FileChange::Update {
+            unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
+            move_path: None,
+        },
+    );
+
+    // Begin -> no output
+    let begin = event(
+        "p1",
+        EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+            call_id: "call-2".to_string(),
+            auto_approved: false,
+            changes: changes.clone(),
+        }),
+    );
+    assert!(ep.collect_thread_events(&begin).is_empty());
+
+    // End (failure) -> item.completed (item_0) with Failed status
+    let end = event(
+        "p2",
+        EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+            call_id: "call-2".to_string(),
+            stdout: String::new(),
+            stderr: "failed to apply".to_string(),
+            success: false,
+        }),
+    );
+    let out_end = ep.collect_thread_events(&end);
+    assert_eq!(out_end.len(), 1);
+
+    match &out_end[0] {
+        ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
+            assert_eq!(&item.id, "item_0");
+            match &item.details {
+                ThreadItemDetails::FileChange(file_update) => {
+                    assert_eq!(file_update.status, PatchApplyStatus::Failed);
+                    assert_eq!(file_update.changes.len(), 1);
+                    assert_eq!(file_update.changes[0].path, "file.txt".to_string());
+                    assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
+                }
+                other => panic!("unexpected details: {other:?}"),
+            }
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn task_complete_produces_turn_completed_with_usage() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // First, feed a TokenCount event with known totals.
+    let usage = codex_core::protocol::TokenUsage {
+        input_tokens: 1200,
+        cached_input_tokens: 200,
+        output_tokens: 345,
+        reasoning_output_tokens: 0,
+        total_tokens: 0,
+    };
+    let info = codex_core::protocol::TokenUsageInfo {
+        total_token_usage: usage.clone(),
+        last_token_usage: usage,
+        model_context_window: None,
+    };
+    let token_count_event = event(
+        "e1",
+        EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
+            info: Some(info),
+            rate_limits: None,
+        }),
+    );
+    assert!(ep.collect_thread_events(&token_count_event).is_empty());
+
+    // Then TaskComplete should produce turn.completed with the captured usage.
+    let complete_event = event(
+        "e2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: Some("done".to_string()),
+        }),
+    );
+    let out = ep.collect_thread_events(&complete_event);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::TurnCompleted(TurnCompletedEvent {
+            usage: Usage {
+                input_tokens: 1200,
+                cached_input_tokens: 200,
+                output_tokens: 345,
+            },
+        })]
+    );
+}
--- a/llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
+++ b/llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
@@ -0,0 +1,4 @@
+class BaseClass:
+  def method():
+
+    return True
--- a/llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
+++ b/llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
@@ -0,0 +1,10 @@
+event: response.created
+data: {"type":"response.created","response":{"id":"resp1"}}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
+
+
--- a/llmx-rs/exec/tests/suite/apply_patch.rs
+++ b/llmx-rs/exec/tests/suite/apply_patch.rs
@@ -0,0 +1,151 @@
+#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]
+
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use codex_core::CODEX_APPLY_PATCH_ARG1;
+use core_test_support::responses::ev_apply_patch_custom_tool_call;
+use core_test_support::responses::ev_apply_patch_function_call;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_sequence;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use std::fs;
+use std::process::Command;
+use tempfile::tempdir;
+
+/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
+/// at some point, we must ensure that the smaller `codex-exec` CLI can still
+/// emulate the `apply_patch` CLI.
+#[test]
+fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let relative_path = "source.txt";
+    let absolute_path = tmp.path().join(relative_path);
+    fs::write(&absolute_path, "original content\n")?;
+
+    Command::cargo_bin("codex-exec")
+        .context("should find binary for codex-exec")?
+        .arg(CODEX_APPLY_PATCH_ARG1)
+        .arg(
+            r#"*** Begin Patch
+*** Update File: source.txt
+@@
+-original content
+modified by apply_patch
+*** End Patch"#,
+        )
+        .current_dir(tmp.path())
+        .assert()
+        .success()
+        .stdout("Success. Updated the following files:\nM source.txt\n")
+        .stderr(predicates::str::is_empty());
+    assert_eq!(
+        fs::read_to_string(absolute_path)?,
+        "modified by apply_patch\n"
+    );
+    Ok(())
+}
+
+#[cfg(not(target_os = "windows"))]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_apply_patch_tool() -> anyhow::Result<()> {
+    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;
+
+    skip_if_no_network!(Ok(()));
+
+    let test = test_codex_exec();
+    let tmp_path = test.cwd_path().to_path_buf();
+    let add_patch = r#"*** Begin Patch
+*** Add File: test.md
+Hello world
+*** End Patch"#;
+    let update_patch = r#"*** Begin Patch
+*** Update File: test.md
+@@
+-Hello world
+Final text
+*** End Patch"#;
+    let response_streams = vec![
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_0", add_patch),
+            ev_completed("request_0"),
+        ]),
+        sse(vec![
+            ev_apply_patch_function_call("request_1", update_patch),
+            ev_completed("request_1"),
+        ]),
+        sse(vec![ev_completed("request_2")]),
+    ];
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();
+
+    let final_path = tmp_path.join("test.md");
+    let contents = std::fs::read_to_string(&final_path)
+        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
+    assert_eq!(contents, "Final text\n");
+    Ok(())
+}
+
+#[cfg(not(target_os = "windows"))]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
+    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;
+
+    skip_if_no_network!(Ok(()));
+
+    let test = test_codex_exec();
+    let freeform_add_patch = r#"*** Begin Patch
+*** Add File: app.py
+class BaseClass:
+  def method():
+    return False
+*** End Patch"#;
+    let freeform_update_patch = r#"*** Begin Patch
+*** Update File: app.py
+@@  def method():
+-    return False
+
+    return True
+*** End Patch"#;
+    let response_streams = vec![
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
+            ev_completed("request_0"),
+        ]),
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
+            ev_completed("request_1"),
+        ]),
+        sse(vec![ev_completed("request_2")]),
+    ];
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();
+
+    // Verify final file contents
+    let final_path = test.cwd_path().join("app.py");
+    let contents = std::fs::read_to_string(&final_path)
+        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
+    assert_eq!(
+        contents,
+        include_str!("../fixtures/apply_patch_freeform_final.txt")
+    );
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/auth_env.rs
+++ b/llmx-rs/exec/tests/suite/auth_env.rs
@@ -0,0 +1,30 @@
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::header;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let server = start_mock_server().await;
+
+    mount_sse_once_match(
+        &server,
+        header("Authorization", "Bearer dummy"),
+        sse(vec![ev_completed("request_0")]),
+    )
+    .await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg("echo testing codex api key")
+        .assert()
+        .success();
+
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/mod.rs
+++ b/llmx-rs/exec/tests/suite/mod.rs
@@ -0,0 +1,8 @@
+// Aggregates all former standalone integration tests as modules.
+mod apply_patch;
+mod auth_env;
+mod originator;
+mod output_schema;
+mod resume;
+mod sandbox;
+mod server_error_exit;
--- a/llmx-rs/exec/tests/suite/originator.rs
+++ b/llmx-rs/exec/tests/suite/originator.rs
@@ -0,0 +1,52 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::header;
+
+/// Verify that when the server reports an error, `codex-exec` exits with a
+/// non-zero status code so automation can detect failures.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn send_codex_exec_originator() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("response_1"),
+        responses::ev_assistant_message("response_1", "Hello, world!"),
+        responses::ev_completed("response_1"),
+    ]);
+    responses::mount_sse_once_match(&server, header("Originator", "codex_exec"), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .assert()
+        .code(0);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn supports_originator_override() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("response_1"),
+        responses::ev_assistant_message("response_1", "Hello, world!"),
+        responses::ev_completed("response_1"),
+    ]);
+    responses::mount_sse_once_match(&server, header("Originator", "codex_exec_override"), body)
+        .await;
+
+    test.cmd_with_server(&server)
+        .env("CODEX_INTERNAL_ORIGINATOR_OVERRIDE", "codex_exec_override")
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .assert()
+        .code(0);
+
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/output_schema.rs
+++ b/llmx-rs/exec/tests/suite/output_schema.rs
@@ -0,0 +1,63 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::Value;
+use wiremock::matchers::any;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let schema_contents = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+    let schema_path = test.cwd_path().join("schema.json");
+    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
+    let expected_schema: Value = schema_contents;
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("resp1"),
+        responses::ev_assistant_message("m1", "fixture hello"),
+        responses::ev_completed("resp1"),
+    ]);
+    let response_mock = responses::mount_sse_once_match(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        // keep using -C in the test to exercise the flag as well
+        .arg("-C")
+        .arg(test.cwd_path())
+        .arg("--output-schema")
+        .arg(&schema_path)
+        .arg("-m")
+        .arg("gpt-5")
+        .arg("tell me a joke")
+        .assert()
+        .success();
+
+    let request = response_mock.single_request();
+    let payload: Value = request.body_json();
+    let text = payload.get("text").expect("request missing text field");
+    let format = text
+        .get("format")
+        .expect("request missing text.format field");
+    assert_eq!(
+        format,
+        &serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": expected_schema,
+        })
+    );
+
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/resume.rs
+++ b/llmx-rs/exec/tests/suite/resume.rs
@@ -0,0 +1,257 @@
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+use anyhow::Context;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::Value;
+use std::path::Path;
+use std::string::ToString;
+use uuid::Uuid;
+use walkdir::WalkDir;
+
+/// Utility: scan the sessions dir for a rollout file that contains `marker`
+/// in any response_item.message.content entry. Returns the absolute path.
+fn find_session_file_containing_marker(
+    sessions_dir: &std::path::Path,
+    marker: &str,
+) -> Option<std::path::PathBuf> {
+    for entry in WalkDir::new(sessions_dir) {
+        let entry = match entry {
+            Ok(e) => e,
+            Err(_) => continue,
+        };
+        if !entry.file_type().is_file() {
+            continue;
+        }
+        if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
+            continue;
+        }
+        let path = entry.path();
+        let Ok(content) = std::fs::read_to_string(path) else {
+            continue;
+        };
+        // Skip the first meta line and scan remaining JSONL entries.
+        let mut lines = content.lines();
+        if lines.next().is_none() {
+            continue;
+        }
+        for line in lines {
+            if line.trim().is_empty() {
+                continue;
+            }
+            let Ok(item): Result<Value, _> = serde_json::from_str(line) else {
+                continue;
+            };
+            if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
+                && let Some(payload) = item.get("payload")
+                && payload.get("type").and_then(|t| t.as_str()) == Some("message")
+                && payload
+                    .get("content")
+                    .map(ToString::to_string)
+                    .unwrap_or_default()
+                    .contains(marker)
+            {
+                return Some(path.to_path_buf());
+            }
+        }
+    }
+    None
+}
+
+/// Extract the conversation UUID from the first SessionMeta line in the rollout file.
+fn extract_conversation_id(path: &std::path::Path) -> String {
+    let content = std::fs::read_to_string(path).unwrap();
+    let mut lines = content.lines();
+    let meta_line = lines.next().expect("missing meta line");
+    let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json");
+    meta.get("payload")
+        .and_then(|p| p.get("id"))
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string()
+}
+
+#[test]
+fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let fixture =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
+
+    // 1) First run: create a session with a unique marker in the content.
+    let marker = format!("resume-last-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt)
+        .assert()
+        .success();
+
+    // Find the created session file containing the marker.
+    let sessions_dir = test.home_path().join("sessions");
+    let path = find_session_file_containing_marker(&sessions_dir, &marker)
+        .expect("no session file found after first run");
+
+    // 2) Second run: resume the most recent file with a new marker.
+    let marker2 = format!("resume-last-2-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2)
+        .arg("resume")
+        .arg("--last")
+        .assert()
+        .success();
+
+    // Ensure the same file was updated and contains both markers.
+    let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
+        .expect("no resumed session file containing marker2");
+    assert_eq!(
+        resumed_path, path,
+        "resume --last should append to existing file"
+    );
+    let content = std::fs::read_to_string(&resumed_path)?;
+    assert!(content.contains(&marker));
+    assert!(content.contains(&marker2));
+    Ok(())
+}
+
+#[test]
+fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let fixture =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
+
+    // 1) First run: create a session
+    let marker = format!("resume-by-id-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt)
+        .assert()
+        .success();
+
+    let sessions_dir = test.home_path().join("sessions");
+    let path = find_session_file_containing_marker(&sessions_dir, &marker)
+        .expect("no session file found after first run");
+    let session_id = extract_conversation_id(&path);
+    assert!(
+        !session_id.is_empty(),
+        "missing conversation id in meta line"
+    );
+
+    // 2) Resume by id
+    let marker2 = format!("resume-by-id-2-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2)
+        .arg("resume")
+        .arg(&session_id)
+        .assert()
+        .success();
+
+    let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
+        .expect("no resumed session file containing marker2");
+    assert_eq!(
+        resumed_path, path,
+        "resume by id should append to existing file"
+    );
+    let content = std::fs::read_to_string(&resumed_path)?;
+    assert!(content.contains(&marker));
+    assert!(content.contains(&marker2));
+    Ok(())
+}
+
+#[test]
+fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let fixture =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
+
+    let marker = format!("resume-config-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("--sandbox")
+        .arg("workspace-write")
+        .arg("--model")
+        .arg("gpt-5")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt)
+        .assert()
+        .success();
+
+    let sessions_dir = test.home_path().join("sessions");
+    let path = find_session_file_containing_marker(&sessions_dir, &marker)
+        .expect("no session file found after first run");
+
+    let marker2 = format!("resume-config-2-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+
+    let output = test
+        .cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("--sandbox")
+        .arg("workspace-write")
+        .arg("--model")
+        .arg("gpt-5-high")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2)
+        .arg("resume")
+        .arg("--last")
+        .output()
+        .context("resume run should succeed")?;
+
+    assert!(output.status.success(), "resume run failed: {output:?}");
+
+    let stderr = String::from_utf8(output.stderr)?;
+    assert!(
+        stderr.contains("model: gpt-5-high"),
+        "stderr missing model override: {stderr}"
+    );
+    if cfg!(target_os = "windows") {
+        assert!(
+            stderr.contains("sandbox: read-only"),
+            "stderr missing downgraded sandbox note: {stderr}"
+        );
+    } else {
+        assert!(
+            stderr.contains("sandbox: workspace-write"),
+            "stderr missing sandbox override: {stderr}"
+        );
+    }
+
+    let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
+        .expect("no resumed session file containing marker2");
+    assert_eq!(resumed_path, path, "resume should append to same file");
+
+    let content = std::fs::read_to_string(&resumed_path)?;
+    assert!(content.contains(&marker));
+    assert!(content.contains(&marker2));
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/sandbox.rs
+++ b/llmx-rs/exec/tests/suite/sandbox.rs
@@ -0,0 +1,322 @@
+#![cfg(unix)]
+use codex_core::protocol::SandboxPolicy;
+use codex_core::spawn::StdioPolicy;
+use std::collections::HashMap;
+use std::future::Future;
+use std::io;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::ExitStatus;
+use tokio::fs::create_dir_all;
+use tokio::process::Child;
+
+#[cfg(target_os = "macos")]
+async fn spawn_command_under_sandbox(
+    command: Vec<String>,
+    command_cwd: PathBuf,
+    sandbox_policy: &SandboxPolicy,
+    sandbox_cwd: &Path,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    use codex_core::seatbelt::spawn_command_under_seatbelt;
+    spawn_command_under_seatbelt(
+        command,
+        command_cwd,
+        sandbox_policy,
+        sandbox_cwd,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+#[cfg(target_os = "linux")]
+async fn spawn_command_under_sandbox(
+    command: Vec<String>,
+    command_cwd: PathBuf,
+    sandbox_policy: &SandboxPolicy,
+    sandbox_cwd: &Path,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    use codex_core::landlock::spawn_command_under_linux_sandbox;
+    let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
+    spawn_command_under_linux_sandbox(
+        codex_linux_sandbox_exe,
+        command,
+        command_cwd,
+        sandbox_policy,
+        sandbox_cwd,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+#[tokio::test]
+async fn python_multiprocessing_lock_works_under_sandbox() {
+    core_test_support::skip_if_sandbox!();
+    #[cfg(target_os = "macos")]
+    let writable_roots = Vec::<PathBuf>::new();
+
+    // From https://man7.org/linux/man-pages/man7/sem_overview.7.html
+    //
+    // > On Linux, named semaphores are created in a virtual filesystem,
+    // > normally mounted under /dev/shm.
+    #[cfg(target_os = "linux")]
+    let writable_roots = vec![PathBuf::from("/dev/shm")];
+
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots,
+        network_access: false,
+        exclude_tmpdir_env_var: false,
+        exclude_slash_tmp: false,
+    };
+
+    let python_code = r#"import multiprocessing
+from multiprocessing import Lock, Process
+
+def f(lock):
+    with lock:
+        print("Lock acquired in child process")
+
+if __name__ == '__main__':
+    lock = Lock()
+    p = Process(target=f, args=(lock,))
+    p.start()
+    p.join()
+"#;
+
+    let command_cwd = std::env::current_dir().expect("should be able to get current dir");
+    let sandbox_cwd = command_cwd.clone();
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "python3".to_string(),
+            "-c".to_string(),
+            python_code.to_string(),
+        ],
+        command_cwd,
+        &policy,
+        sandbox_cwd.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should be able to spawn python under sandbox");
+
+    let status = child.wait().await.expect("should wait for child process");
+    assert!(status.success(), "python exited with {status:?}");
+}
+
+#[tokio::test]
+async fn sandbox_distinguishes_command_and_policy_cwds() {
+    core_test_support::skip_if_sandbox!();
+    let temp = tempfile::tempdir().expect("should be able to create temp dir");
+    let sandbox_root = temp.path().join("sandbox");
+    let command_root = temp.path().join("command");
+    create_dir_all(&sandbox_root).await.expect("mkdir");
+    create_dir_all(&command_root).await.expect("mkdir");
+    let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
+        .await
+        .expect("canonicalize sandbox root");
+    let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
+
+    let disallowed_path = command_root.join("forbidden.txt");
+
+    // Note writable_roots is empty: verify that `canonical_allowed_path` is
+    // writable only because it is under the sandbox policy cwd, not because it
+    // is under a writable root.
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![],
+        network_access: false,
+        exclude_tmpdir_env_var: true,
+        exclude_slash_tmp: true,
+    };
+
+    // Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "echo forbidden > forbidden.txt".to_string(),
+        ],
+        command_root.clone(),
+        &policy,
+        canonical_sandbox_root.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should spawn command writing to forbidden path");
+
+    let status = child
+        .wait()
+        .await
+        .expect("should wait for forbidden command");
+    assert!(
+        !status.success(),
+        "sandbox unexpectedly allowed writing to command cwd: {status:?}"
+    );
+    let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
+        .await
+        .expect("try_exists failed");
+    assert!(
+        !forbidden_exists,
+        "forbidden path should not have been created"
+    );
+
+    // Writing to the sandbox policy cwd after changing directories into it should succeed.
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "/usr/bin/touch".to_string(),
+            canonical_allowed_path.to_string_lossy().into_owned(),
+        ],
+        command_root,
+        &policy,
+        canonical_sandbox_root.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should spawn command writing to sandbox root");
+
+    let status = child.wait().await.expect("should wait for allowed command");
+    assert!(
+        status.success(),
+        "sandbox blocked allowed write: {status:?}"
+    );
+    let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
+        .await
+        .expect("try_exists allowed failed");
+    assert!(allowed_exists, "allowed path should exist");
+}
+
+fn unix_sock_body() {
+    unsafe {
+        let mut fds = [0i32; 2];
+        let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
+        assert_eq!(
+            r,
+            0,
+            "socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
+            io::Error::last_os_error()
+        );
+
+        let msg = b"hello_unix";
+        // write() from one end (generic write is allowed)
+        let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
+        assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
+
+        // recvfrom() on the other end. We don’t need the address for socketpair,
+        // so we pass null pointers for src address.
+        let mut buf = [0u8; 64];
+        let recvd = libc::recvfrom(
+            fds[1],
+            buf.as_mut_ptr() as *mut libc::c_void,
+            buf.len(),
+            0,
+            std::ptr::null_mut(),
+            std::ptr::null_mut(),
+        );
+        assert!(
+            recvd >= 0,
+            "recvfrom() failed: {}",
+            io::Error::last_os_error()
+        );
+
+        let recvd_slice = &buf[..(recvd as usize)];
+        assert_eq!(
+            recvd_slice,
+            &msg[..],
+            "payload mismatch: sent {} bytes, got {} bytes",
+            msg.len(),
+            recvd
+        );
+
+        // Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
+        let mut sfds = [0i32; 2];
+        let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
+        assert_eq!(
+            sr,
+            0,
+            "socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
+            io::Error::last_os_error()
+        );
+        let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
+        assert!(
+            snt2 >= 0,
+            "write(stream) failed: {}",
+            io::Error::last_os_error()
+        );
+        let mut b2 = [0u8; 64];
+        let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
+        assert!(
+            rcv2 >= 0,
+            "recv(stream) failed: {}",
+            io::Error::last_os_error()
+        );
+
+        // Clean up
+        let _ = libc::close(sfds[0]);
+        let _ = libc::close(sfds[1]);
+        let _ = libc::close(fds[0]);
+        let _ = libc::close(fds[1]);
+    }
+}
+
+#[tokio::test]
+async fn allow_unix_socketpair_recvfrom() {
+    run_code_under_sandbox(
+        "allow_unix_socketpair_recvfrom",
+        &SandboxPolicy::ReadOnly,
+        || async { unix_sock_body() },
+    )
+    .await
+    .expect("should be able to reexec");
+}
+
+const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
+
+#[expect(clippy::expect_used)]
+pub async fn run_code_under_sandbox<F, Fut>(
+    test_selector: &str,
+    policy: &SandboxPolicy,
+    child_body: F,
+) -> io::Result<Option<ExitStatus>>
+where
+    F: FnOnce() -> Fut + Send + 'static,
+    Fut: Future<Output = ()> + Send + 'static,
+{
+    if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
+        let exe = std::env::current_exe()?;
+        let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
+        let mut stdio_policy = StdioPolicy::RedirectForShellTool;
+        // Allow for us to pass forward --nocapture / use the right stdio policy.
+        if std::env::args().any(|a| a == "--nocapture") {
+            cmds.push("--nocapture".into());
+            stdio_policy = StdioPolicy::Inherit;
+        }
+        cmds.push(test_selector.into());
+
+        // Your existing launcher:
+        let command_cwd = std::env::current_dir().expect("should be able to get current dir");
+        let sandbox_cwd = command_cwd.clone();
+        let mut child = spawn_command_under_sandbox(
+            cmds,
+            command_cwd,
+            policy,
+            sandbox_cwd.as_path(),
+            stdio_policy,
+            HashMap::from([("IN_SANDBOX".into(), "1".into())]),
+        )
+        .await?;
+
+        let status = child.wait().await?;
+        Ok(Some(status))
+    } else {
+        // Child branch: run the provided body.
+        child_body().await;
+        Ok(None)
+    }
+}
--- a/llmx-rs/exec/tests/suite/server_error_exit.rs
+++ b/llmx-rs/exec/tests/suite/server_error_exit.rs
@@ -0,0 +1,34 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::any;
+
+/// Verify that when the server reports an error, `codex-exec` exits with a
+/// non-zero status code so automation can detect failures.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    // Mock a simple Responses API SSE stream that immediately reports a
+    // `response.failed` event with an error message.
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![serde_json::json!({
+        "type": "response.failed",
+        "response": {
+            "id": "resp_err_1",
+            "error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
+        }
+    })]);
+    responses::mount_sse_once_match(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .arg("--experimental-json")
+        .assert()
+        .code(1);
+
+    Ok(())
+}