Phase 1: Repository & Infrastructure Setup

- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli - Updated package.json files: - Root: llmx-monorepo - CLI: @llmx/llmx - SDK: @llmx/llmx-sdk - Updated pnpm workspace configuration - Renamed binary: codex.js -> llmx.js - Updated environment variables: CODEX_* -> LLMX_* - Changed repository URLs to valknar/llmx 🤖 Generated with Claude Code
2025-11-11 14:01:52 +01:00
parent 052b052832
commit f237fe560d
1151 changed files with 41 additions and 35 deletions
--- a/llmx-rs/exec/Cargo.toml
+++ b/llmx-rs/exec/Cargo.toml
@@ -0,0 +1,62 @@
+[package]
+edition = "2024"
+name = "codex-exec"
+version = { workspace = true }
+
+[[bin]]
+name = "codex-exec"
+path = "src/main.rs"
+
+[lib]
+name = "codex_exec"
+path = "src/lib.rs"
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
+codex-arg0 = { workspace = true }
+codex-common = { workspace = true, features = [
+    "cli",
+    "elapsed",
+    "sandbox_summary",
+] }
+codex-core = { workspace = true }
+codex-ollama = { workspace = true }
+codex-protocol = { workspace = true }
+mcp-types = { workspace = true }
+opentelemetry-appender-tracing = { workspace = true }
+owo-colors = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+shlex = { workspace = true }
+supports-color = { workspace = true }
+tokio = { workspace = true, features = [
+    "io-std",
+    "macros",
+    "process",
+    "rt-multi-thread",
+    "signal",
+] }
+tracing = { workspace = true, features = ["log"] }
+tracing-subscriber = { workspace = true, features = ["env-filter"] }
+ts-rs = { workspace = true, features = [
+    "uuid-impl",
+    "serde-json-impl",
+    "no-serde-warnings",
+] }
+
+
+[dev-dependencies]
+assert_cmd = { workspace = true }
+core_test_support = { workspace = true }
+libc = { workspace = true }
+mcp-types = { workspace = true }
+predicates = { workspace = true }
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }
+uuid = { workspace = true }
+walkdir = { workspace = true }
+wiremock = { workspace = true }
--- a/llmx-rs/exec/src/cli.rs
+++ b/llmx-rs/exec/src/cli.rs
@@ -0,0 +1,109 @@
+use clap::Parser;
+use clap::ValueEnum;
+use codex_common::CliConfigOverrides;
+use std::path::PathBuf;
+
+#[derive(Parser, Debug)]
+#[command(version)]
+pub struct Cli {
+    /// Action to perform. If omitted, runs a new non-interactive session.
+    #[command(subcommand)]
+    pub command: Option<Command>,
+
+    /// Optional image(s) to attach to the initial prompt.
+    #[arg(long = "image", short = 'i', value_name = "FILE", value_delimiter = ',', num_args = 1..)]
+    pub images: Vec<PathBuf>,
+
+    /// Model the agent should use.
+    #[arg(long, short = 'm')]
+    pub model: Option<String>,
+
+    #[arg(long = "oss", default_value_t = false)]
+    pub oss: bool,
+
+    /// Select the sandbox policy to use when executing model-generated shell
+    /// commands.
+    #[arg(long = "sandbox", short = 's', value_enum)]
+    pub sandbox_mode: Option<codex_common::SandboxModeCliArg>,
+
+    /// Configuration profile from config.toml to specify default options.
+    #[arg(long = "profile", short = 'p')]
+    pub config_profile: Option<String>,
+
+    /// Convenience alias for low-friction sandboxed automatic execution (-a on-failure, --sandbox workspace-write).
+    #[arg(long = "full-auto", default_value_t = false)]
+    pub full_auto: bool,
+
+    /// Skip all confirmation prompts and execute commands without sandboxing.
+    /// EXTREMELY DANGEROUS. Intended solely for running in environments that are externally sandboxed.
+    #[arg(
+        long = "dangerously-bypass-approvals-and-sandbox",
+        alias = "yolo",
+        default_value_t = false,
+        conflicts_with = "full_auto"
+    )]
+    pub dangerously_bypass_approvals_and_sandbox: bool,
+
+    /// Tell the agent to use the specified directory as its working root.
+    #[clap(long = "cd", short = 'C', value_name = "DIR")]
+    pub cwd: Option<PathBuf>,
+
+    /// Allow running Codex outside a Git repository.
+    #[arg(long = "skip-git-repo-check", default_value_t = false)]
+    pub skip_git_repo_check: bool,
+
+    /// Path to a JSON Schema file describing the model's final response shape.
+    #[arg(long = "output-schema", value_name = "FILE")]
+    pub output_schema: Option<PathBuf>,
+
+    #[clap(skip)]
+    pub config_overrides: CliConfigOverrides,
+
+    /// Specifies color settings for use in the output.
+    #[arg(long = "color", value_enum, default_value_t = Color::Auto)]
+    pub color: Color,
+
+    /// Print events to stdout as JSONL.
+    #[arg(long = "json", alias = "experimental-json", default_value_t = false)]
+    pub json: bool,
+
+    /// Specifies file where the last message from the agent should be written.
+    #[arg(long = "output-last-message", short = 'o', value_name = "FILE")]
+    pub last_message_file: Option<PathBuf>,
+
+    /// Initial instructions for the agent. If not provided as an argument (or
+    /// if `-` is used), instructions are read from stdin.
+    #[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
+    pub prompt: Option<String>,
+}
+
+#[derive(Debug, clap::Subcommand)]
+pub enum Command {
+    /// Resume a previous session by id or pick the most recent with --last.
+    Resume(ResumeArgs),
+}
+
+#[derive(Parser, Debug)]
+pub struct ResumeArgs {
+    /// Conversation/session id (UUID). When provided, resumes this session.
+    /// If omitted, use --last to pick the most recent recorded session.
+    #[arg(value_name = "SESSION_ID")]
+    pub session_id: Option<String>,
+
+    /// Resume the most recent recorded session (newest) without specifying an id.
+    #[arg(long = "last", default_value_t = false, conflicts_with = "session_id")]
+    pub last: bool,
+
+    /// Prompt to send after resuming the session. If `-` is used, read from stdin.
+    #[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
+    pub prompt: Option<String>,
+}
+
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, ValueEnum)]
+#[value(rename_all = "kebab-case")]
+pub enum Color {
+    Always,
+    Never,
+    #[default]
+    Auto,
+}
--- a/llmx-rs/exec/src/event_processor.rs
+++ b/llmx-rs/exec/src/event_processor.rs
@@ -0,0 +1,45 @@
+use std::path::Path;
+
+use codex_core::config::Config;
+use codex_core::protocol::Event;
+use codex_core::protocol::SessionConfiguredEvent;
+
+pub(crate) enum CodexStatus {
+    Running,
+    InitiateShutdown,
+    Shutdown,
+}
+
+pub(crate) trait EventProcessor {
+    /// Print summary of effective configuration and user prompt.
+    fn print_config_summary(
+        &mut self,
+        config: &Config,
+        prompt: &str,
+        session_configured: &SessionConfiguredEvent,
+    );
+
+    /// Handle a single event emitted by the agent.
+    fn process_event(&mut self, event: Event) -> CodexStatus;
+
+    fn print_final_output(&mut self) {}
+}
+
+pub(crate) fn handle_last_message(last_agent_message: Option<&str>, output_file: &Path) {
+    let message = last_agent_message.unwrap_or_default();
+    write_last_message_file(message, Some(output_file));
+    if last_agent_message.is_none() {
+        eprintln!(
+            "Warning: no last agent message; wrote empty content to {}",
+            output_file.display()
+        );
+    }
+}
+
+fn write_last_message_file(contents: &str, last_message_path: Option<&Path>) {
+    if let Some(path) = last_message_path
+        && let Err(e) = std::fs::write(path, contents)
+    {
+        eprintln!("Failed to write last message file {path:?}: {e}");
+    }
+}
--- a/llmx-rs/exec/src/event_processor_with_human_output.rs
+++ b/llmx-rs/exec/src/event_processor_with_human_output.rs
@@ -0,0 +1,599 @@
+use codex_common::elapsed::format_duration;
+use codex_common::elapsed::format_elapsed;
+use codex_core::config::Config;
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningRawContentEvent;
+use codex_core::protocol::BackgroundEventEvent;
+use codex_core::protocol::DeprecationNoticeEvent;
+use codex_core::protocol::ErrorEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::McpInvocation;
+use codex_core::protocol::McpToolCallBeginEvent;
+use codex_core::protocol::McpToolCallEndEvent;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::StreamErrorEvent;
+use codex_core::protocol::TaskCompleteEvent;
+use codex_core::protocol::TurnAbortReason;
+use codex_core::protocol::TurnDiffEvent;
+use codex_core::protocol::WarningEvent;
+use codex_core::protocol::WebSearchEndEvent;
+use codex_protocol::num_format::format_with_separators;
+use owo_colors::OwoColorize;
+use owo_colors::Style;
+use shlex::try_join;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::time::Instant;
+
+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+use crate::event_processor::handle_last_message;
+use codex_common::create_config_summary_entries;
+use codex_protocol::plan_tool::StepStatus;
+use codex_protocol::plan_tool::UpdatePlanArgs;
+
+/// This should be configurable. When used in CI, users may not want to impose
+/// a limit so they can see the full transcript.
+const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
+pub(crate) struct EventProcessorWithHumanOutput {
+    call_id_to_patch: HashMap<String, PatchApplyBegin>,
+
+    // To ensure that --color=never is respected, ANSI escapes _must_ be added
+    // using .style() with one of these fields. If you need a new style, add a
+    // new field here.
+    bold: Style,
+    italic: Style,
+    dimmed: Style,
+
+    magenta: Style,
+    red: Style,
+    green: Style,
+    cyan: Style,
+    yellow: Style,
+
+    /// Whether to include `AgentReasoning` events in the output.
+    show_agent_reasoning: bool,
+    show_raw_agent_reasoning: bool,
+    last_message_path: Option<PathBuf>,
+    last_total_token_usage: Option<codex_core::protocol::TokenUsageInfo>,
+    final_message: Option<String>,
+}
+
+impl EventProcessorWithHumanOutput {
+    pub(crate) fn create_with_ansi(
+        with_ansi: bool,
+        config: &Config,
+        last_message_path: Option<PathBuf>,
+    ) -> Self {
+        let call_id_to_patch = HashMap::new();
+
+        if with_ansi {
+            Self {
+                call_id_to_patch,
+                bold: Style::new().bold(),
+                italic: Style::new().italic(),
+                dimmed: Style::new().dimmed(),
+                magenta: Style::new().magenta(),
+                red: Style::new().red(),
+                green: Style::new().green(),
+                cyan: Style::new().cyan(),
+                yellow: Style::new().yellow(),
+                show_agent_reasoning: !config.hide_agent_reasoning,
+                show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+                last_message_path,
+                last_total_token_usage: None,
+                final_message: None,
+            }
+        } else {
+            Self {
+                call_id_to_patch,
+                bold: Style::new(),
+                italic: Style::new(),
+                dimmed: Style::new(),
+                magenta: Style::new(),
+                red: Style::new(),
+                green: Style::new(),
+                cyan: Style::new(),
+                yellow: Style::new(),
+                show_agent_reasoning: !config.hide_agent_reasoning,
+                show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+                last_message_path,
+                last_total_token_usage: None,
+                final_message: None,
+            }
+        }
+    }
+}
+
+struct PatchApplyBegin {
+    start_time: Instant,
+    auto_approved: bool,
+}
+
+/// Timestamped helper. The timestamp is styled with self.dimmed.
+macro_rules! ts_msg {
+    ($self:ident, $($arg:tt)*) => {{
+        eprintln!($($arg)*);
+    }};
+}
+
+impl EventProcessor for EventProcessorWithHumanOutput {
+    /// Print a concise summary of the effective configuration that will be used
+    /// for the session. This mirrors the information shown in the TUI welcome
+    /// screen.
+    fn print_config_summary(
+        &mut self,
+        config: &Config,
+        prompt: &str,
+        session_configured_event: &SessionConfiguredEvent,
+    ) {
+        const VERSION: &str = env!("CARGO_PKG_VERSION");
+        ts_msg!(
+            self,
+            "OpenAI Codex v{} (research preview)\n--------",
+            VERSION
+        );
+
+        let mut entries = create_config_summary_entries(config);
+        entries.push((
+            "session id",
+            session_configured_event.session_id.to_string(),
+        ));
+
+        for (key, value) in entries {
+            eprintln!("{} {}", format!("{key}:").style(self.bold), value);
+        }
+
+        eprintln!("--------");
+
+        // Echo the prompt that will be sent to the agent so it is visible in the
+        // transcript/logs before any events come in. Note the prompt may have been
+        // read from stdin, so it may not be visible in the terminal otherwise.
+        ts_msg!(self, "{}\n{}", "user".style(self.cyan), prompt);
+    }
+
+    fn process_event(&mut self, event: Event) -> CodexStatus {
+        let Event { id: _, msg } = event;
+        match msg {
+            EventMsg::Error(ErrorEvent { message }) => {
+                let prefix = "ERROR:".style(self.red);
+                ts_msg!(self, "{prefix} {message}");
+            }
+            EventMsg::Warning(WarningEvent { message }) => {
+                ts_msg!(
+                    self,
+                    "{} {message}",
+                    "warning:".style(self.yellow).style(self.bold)
+                );
+            }
+            EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary, details }) => {
+                ts_msg!(
+                    self,
+                    "{} {summary}",
+                    "deprecated:".style(self.magenta).style(self.bold)
+                );
+                if let Some(details) = details {
+                    ts_msg!(self, "  {}", details.style(self.dimmed));
+                }
+            }
+            EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
+                ts_msg!(self, "{}", message.style(self.dimmed));
+            }
+            EventMsg::StreamError(StreamErrorEvent { message }) => {
+                ts_msg!(self, "{}", message.style(self.dimmed));
+            }
+            EventMsg::TaskStarted(_) => {
+                // Ignore.
+            }
+            EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
+                let last_message = last_agent_message.as_deref();
+                if let Some(output_file) = self.last_message_path.as_deref() {
+                    handle_last_message(last_message, output_file);
+                }
+
+                self.final_message = last_agent_message;
+
+                return CodexStatus::InitiateShutdown;
+            }
+            EventMsg::TokenCount(ev) => {
+                self.last_total_token_usage = ev.info;
+            }
+
+            EventMsg::AgentReasoningSectionBreak(_) => {
+                if !self.show_agent_reasoning {
+                    return CodexStatus::Running;
+                }
+                eprintln!();
+            }
+            EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent { text }) => {
+                if self.show_raw_agent_reasoning {
+                    ts_msg!(
+                        self,
+                        "{}\n{}",
+                        "thinking".style(self.italic).style(self.magenta),
+                        text,
+                    );
+                }
+            }
+            EventMsg::AgentMessage(AgentMessageEvent { message }) => {
+                ts_msg!(
+                    self,
+                    "{}\n{}",
+                    "codex".style(self.italic).style(self.magenta),
+                    message,
+                );
+            }
+            EventMsg::ExecCommandBegin(ExecCommandBeginEvent { command, cwd, .. }) => {
+                eprint!(
+                    "{}\n{} in {}",
+                    "exec".style(self.italic).style(self.magenta),
+                    escape_command(&command).style(self.bold),
+                    cwd.to_string_lossy(),
+                );
+            }
+            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                aggregated_output,
+                duration,
+                exit_code,
+                ..
+            }) => {
+                let duration = format!(" in {}", format_duration(duration));
+
+                let truncated_output = aggregated_output
+                    .lines()
+                    .take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
+                    .collect::<Vec<_>>()
+                    .join("\n");
+                match exit_code {
+                    0 => {
+                        let title = format!(" succeeded{duration}:");
+                        ts_msg!(self, "{}", title.style(self.green));
+                    }
+                    _ => {
+                        let title = format!(" exited {exit_code}{duration}:");
+                        ts_msg!(self, "{}", title.style(self.red));
+                    }
+                }
+                eprintln!("{}", truncated_output.style(self.dimmed));
+            }
+            EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+                call_id: _,
+                invocation,
+            }) => {
+                ts_msg!(
+                    self,
+                    "{} {}",
+                    "tool".style(self.magenta),
+                    format_mcp_invocation(&invocation).style(self.bold),
+                );
+            }
+            EventMsg::McpToolCallEnd(tool_call_end_event) => {
+                let is_success = tool_call_end_event.is_success();
+                let McpToolCallEndEvent {
+                    call_id: _,
+                    result,
+                    invocation,
+                    duration,
+                } = tool_call_end_event;
+
+                let duration = format!(" in {}", format_duration(duration));
+
+                let status_str = if is_success { "success" } else { "failed" };
+                let title_style = if is_success { self.green } else { self.red };
+                let title = format!(
+                    "{} {status_str}{duration}:",
+                    format_mcp_invocation(&invocation)
+                );
+
+                ts_msg!(self, "{}", title.style(title_style));
+
+                if let Ok(res) = result {
+                    let val: serde_json::Value = res.into();
+                    let pretty =
+                        serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
+
+                    for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
+                        eprintln!("{}", line.style(self.dimmed));
+                    }
+                }
+            }
+            EventMsg::WebSearchEnd(WebSearchEndEvent { call_id: _, query }) => {
+                ts_msg!(self, "🌐 Searched: {query}");
+            }
+            EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+                call_id,
+                auto_approved,
+                changes,
+            }) => {
+                // Store metadata so we can calculate duration later when we
+                // receive the corresponding PatchApplyEnd event.
+                self.call_id_to_patch.insert(
+                    call_id,
+                    PatchApplyBegin {
+                        start_time: Instant::now(),
+                        auto_approved,
+                    },
+                );
+
+                ts_msg!(
+                    self,
+                    "{}",
+                    "file update".style(self.magenta).style(self.italic),
+                );
+
+                // Pretty-print the patch summary with colored diff markers so
+                // it's easy to scan in the terminal output.
+                for (path, change) in changes.iter() {
+                    match change {
+                        FileChange::Add { content } => {
+                            let header = format!(
+                                "{} {}",
+                                format_file_change(change),
+                                path.to_string_lossy()
+                            );
+                            eprintln!("{}", header.style(self.magenta));
+                            for line in content.lines() {
+                                eprintln!("{}", line.style(self.green));
+                            }
+                        }
+                        FileChange::Delete { content } => {
+                            let header = format!(
+                                "{} {}",
+                                format_file_change(change),
+                                path.to_string_lossy()
+                            );
+                            eprintln!("{}", header.style(self.magenta));
+                            for line in content.lines() {
+                                eprintln!("{}", line.style(self.red));
+                            }
+                        }
+                        FileChange::Update {
+                            unified_diff,
+                            move_path,
+                        } => {
+                            let header = if let Some(dest) = move_path {
+                                format!(
+                                    "{} {} -> {}",
+                                    format_file_change(change),
+                                    path.to_string_lossy(),
+                                    dest.to_string_lossy()
+                                )
+                            } else {
+                                format!("{} {}", format_file_change(change), path.to_string_lossy())
+                            };
+                            eprintln!("{}", header.style(self.magenta));
+
+                            // Colorize diff lines. We keep file header lines
+                            // (--- / +++) without extra coloring so they are
+                            // still readable.
+                            for diff_line in unified_diff.lines() {
+                                if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
+                                    eprintln!("{}", diff_line.style(self.green));
+                                } else if diff_line.starts_with('-')
+                                    && !diff_line.starts_with("---")
+                                {
+                                    eprintln!("{}", diff_line.style(self.red));
+                                } else {
+                                    eprintln!("{diff_line}");
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+                call_id,
+                stdout,
+                stderr,
+                success,
+                ..
+            }) => {
+                let patch_begin = self.call_id_to_patch.remove(&call_id);
+
+                // Compute duration and summary label similar to exec commands.
+                let (duration, label) = if let Some(PatchApplyBegin {
+                    start_time,
+                    auto_approved,
+                }) = patch_begin
+                {
+                    (
+                        format!(" in {}", format_elapsed(start_time)),
+                        format!("apply_patch(auto_approved={auto_approved})"),
+                    )
+                } else {
+                    (String::new(), format!("apply_patch('{call_id}')"))
+                };
+
+                let (exit_code, output, title_style) = if success {
+                    (0, stdout, self.green)
+                } else {
+                    (1, stderr, self.red)
+                };
+
+                let title = format!("{label} exited {exit_code}{duration}:");
+                ts_msg!(self, "{}", title.style(title_style));
+                for line in output.lines() {
+                    eprintln!("{}", line.style(self.dimmed));
+                }
+            }
+            EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => {
+                ts_msg!(
+                    self,
+                    "{}",
+                    "file update:".style(self.magenta).style(self.italic)
+                );
+                eprintln!("{unified_diff}");
+            }
+            EventMsg::AgentReasoning(agent_reasoning_event) => {
+                if self.show_agent_reasoning {
+                    ts_msg!(
+                        self,
+                        "{}\n{}",
+                        "thinking".style(self.italic).style(self.magenta),
+                        agent_reasoning_event.text,
+                    );
+                }
+            }
+            EventMsg::SessionConfigured(session_configured_event) => {
+                let SessionConfiguredEvent {
+                    session_id: conversation_id,
+                    model,
+                    reasoning_effort: _,
+                    history_log_id: _,
+                    history_entry_count: _,
+                    initial_messages: _,
+                    rollout_path: _,
+                } = session_configured_event;
+
+                ts_msg!(
+                    self,
+                    "{} {}",
+                    "codex session".style(self.magenta).style(self.bold),
+                    conversation_id.to_string().style(self.dimmed)
+                );
+
+                ts_msg!(self, "model: {}", model);
+                eprintln!();
+            }
+            EventMsg::PlanUpdate(plan_update_event) => {
+                let UpdatePlanArgs { explanation, plan } = plan_update_event;
+
+                // Header
+                ts_msg!(self, "{}", "Plan update".style(self.magenta));
+
+                // Optional explanation
+                if let Some(explanation) = explanation
+                    && !explanation.trim().is_empty()
+                {
+                    ts_msg!(self, "{}", explanation.style(self.italic));
+                }
+
+                // Pretty-print the plan items with simple status markers.
+                for item in plan {
+                    match item.status {
+                        StepStatus::Completed => {
+                            ts_msg!(self, "  {} {}", "✓".style(self.green), item.step);
+                        }
+                        StepStatus::InProgress => {
+                            ts_msg!(self, "  {} {}", "→".style(self.cyan), item.step);
+                        }
+                        StepStatus::Pending => {
+                            ts_msg!(
+                                self,
+                                "  {} {}",
+                                "•".style(self.dimmed),
+                                item.step.style(self.dimmed)
+                            );
+                        }
+                    }
+                }
+            }
+            EventMsg::ViewImageToolCall(view) => {
+                ts_msg!(
+                    self,
+                    "{} {}",
+                    "viewed image".style(self.magenta),
+                    view.path.display()
+                );
+            }
+            EventMsg::TurnAborted(abort_reason) => match abort_reason.reason {
+                TurnAbortReason::Interrupted => {
+                    ts_msg!(self, "task interrupted");
+                }
+                TurnAbortReason::Replaced => {
+                    ts_msg!(self, "task aborted: replaced by a new task");
+                }
+                TurnAbortReason::ReviewEnded => {
+                    ts_msg!(self, "task aborted: review ended");
+                }
+            },
+            EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
+            EventMsg::WebSearchBegin(_)
+            | EventMsg::ExecApprovalRequest(_)
+            | EventMsg::ApplyPatchApprovalRequest(_)
+            | EventMsg::ExecCommandOutputDelta(_)
+            | EventMsg::GetHistoryEntryResponse(_)
+            | EventMsg::McpListToolsResponse(_)
+            | EventMsg::ListCustomPromptsResponse(_)
+            | EventMsg::RawResponseItem(_)
+            | EventMsg::UserMessage(_)
+            | EventMsg::EnteredReviewMode(_)
+            | EventMsg::ExitedReviewMode(_)
+            | EventMsg::AgentMessageDelta(_)
+            | EventMsg::AgentReasoningDelta(_)
+            | EventMsg::AgentReasoningRawContentDelta(_)
+            | EventMsg::ItemStarted(_)
+            | EventMsg::ItemCompleted(_)
+            | EventMsg::AgentMessageContentDelta(_)
+            | EventMsg::ReasoningContentDelta(_)
+            | EventMsg::ReasoningRawContentDelta(_)
+            | EventMsg::UndoCompleted(_)
+            | EventMsg::UndoStarted(_) => {}
+        }
+        CodexStatus::Running
+    }
+
+    fn print_final_output(&mut self) {
+        if let Some(usage_info) = &self.last_total_token_usage {
+            eprintln!(
+                "{}\n{}",
+                "tokens used".style(self.magenta).style(self.italic),
+                format_with_separators(usage_info.total_token_usage.blended_total())
+            );
+        }
+
+        // If the user has not piped the final message to a file, they will see
+        // it twice: once written to stderr as part of the normal event
+        // processing, and once here on stdout. We print the token summary above
+        // to help break up the output visually in that case.
+        #[allow(clippy::print_stdout)]
+        if let Some(message) = &self.final_message {
+            if message.ends_with('\n') {
+                print!("{message}");
+            } else {
+                println!("{message}");
+            }
+        }
+    }
+}
+
+fn escape_command(command: &[String]) -> String {
+    try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "))
+}
+
+fn format_file_change(change: &FileChange) -> &'static str {
+    match change {
+        FileChange::Add { .. } => "A",
+        FileChange::Delete { .. } => "D",
+        FileChange::Update {
+            move_path: Some(_), ..
+        } => "R",
+        FileChange::Update {
+            move_path: None, ..
+        } => "M",
+    }
+}
+
+fn format_mcp_invocation(invocation: &McpInvocation) -> String {
+    // Build fully-qualified tool name: server.tool
+    let fq_tool_name = format!("{}.{}", invocation.server, invocation.tool);
+
+    // Format arguments as compact JSON so they fit on one line.
+    let args_str = invocation
+        .arguments
+        .as_ref()
+        .map(|v: &serde_json::Value| serde_json::to_string(v).unwrap_or_else(|_| v.to_string()))
+        .unwrap_or_default();
+
+    if args_str.is_empty() {
+        format!("{fq_tool_name}()")
+    } else {
+        format!("{fq_tool_name}({args_str})")
+    }
+}
--- a/llmx-rs/exec/src/event_processor_with_jsonl_output.rs
+++ b/llmx-rs/exec/src/event_processor_with_jsonl_output.rs
@@ -0,0 +1,501 @@
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::atomic::AtomicU64;
+
+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+use crate::event_processor::handle_last_message;
+use crate::exec_events::AgentMessageItem;
+use crate::exec_events::CommandExecutionItem;
+use crate::exec_events::CommandExecutionStatus;
+use crate::exec_events::ErrorItem;
+use crate::exec_events::FileChangeItem;
+use crate::exec_events::FileUpdateChange;
+use crate::exec_events::ItemCompletedEvent;
+use crate::exec_events::ItemStartedEvent;
+use crate::exec_events::ItemUpdatedEvent;
+use crate::exec_events::McpToolCallItem;
+use crate::exec_events::McpToolCallItemError;
+use crate::exec_events::McpToolCallItemResult;
+use crate::exec_events::McpToolCallStatus;
+use crate::exec_events::PatchApplyStatus;
+use crate::exec_events::PatchChangeKind;
+use crate::exec_events::ReasoningItem;
+use crate::exec_events::ThreadErrorEvent;
+use crate::exec_events::ThreadEvent;
+use crate::exec_events::ThreadItem;
+use crate::exec_events::ThreadItemDetails;
+use crate::exec_events::ThreadStartedEvent;
+use crate::exec_events::TodoItem;
+use crate::exec_events::TodoListItem;
+use crate::exec_events::TurnCompletedEvent;
+use crate::exec_events::TurnFailedEvent;
+use crate::exec_events::TurnStartedEvent;
+use crate::exec_events::Usage;
+use crate::exec_events::WebSearchItem;
+use codex_core::config::Config;
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::McpToolCallBeginEvent;
+use codex_core::protocol::McpToolCallEndEvent;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::TaskCompleteEvent;
+use codex_core::protocol::TaskStartedEvent;
+use codex_core::protocol::WebSearchEndEvent;
+use codex_protocol::plan_tool::StepStatus;
+use codex_protocol::plan_tool::UpdatePlanArgs;
+use serde_json::Value as JsonValue;
+use tracing::error;
+use tracing::warn;
+
+pub struct EventProcessorWithJsonOutput {
+    last_message_path: Option<PathBuf>,
+    next_event_id: AtomicU64,
+    // Tracks running commands by call_id, including the associated item id.
+    running_commands: HashMap<String, RunningCommand>,
+    running_patch_applies: HashMap<String, PatchApplyBeginEvent>,
+    // Tracks the todo list for the current turn (at most one per turn).
+    running_todo_list: Option<RunningTodoList>,
+    last_total_token_usage: Option<codex_core::protocol::TokenUsage>,
+    running_mcp_tool_calls: HashMap<String, RunningMcpToolCall>,
+    last_critical_error: Option<ThreadErrorEvent>,
+}
+
+#[derive(Debug, Clone)]
+struct RunningCommand {
+    command: String,
+    item_id: String,
+}
+
+#[derive(Debug, Clone)]
+struct RunningTodoList {
+    item_id: String,
+    items: Vec<TodoItem>,
+}
+
+#[derive(Debug, Clone)]
+struct RunningMcpToolCall {
+    server: String,
+    tool: String,
+    item_id: String,
+    arguments: JsonValue,
+}
+
+impl EventProcessorWithJsonOutput {
+    pub fn new(last_message_path: Option<PathBuf>) -> Self {
+        Self {
+            last_message_path,
+            next_event_id: AtomicU64::new(0),
+            running_commands: HashMap::new(),
+            running_patch_applies: HashMap::new(),
+            running_todo_list: None,
+            last_total_token_usage: None,
+            running_mcp_tool_calls: HashMap::new(),
+            last_critical_error: None,
+        }
+    }
+
+    pub fn collect_thread_events(&mut self, event: &Event) -> Vec<ThreadEvent> {
+        match &event.msg {
+            EventMsg::SessionConfigured(ev) => self.handle_session_configured(ev),
+            EventMsg::AgentMessage(ev) => self.handle_agent_message(ev),
+            EventMsg::AgentReasoning(ev) => self.handle_reasoning_event(ev),
+            EventMsg::ExecCommandBegin(ev) => self.handle_exec_command_begin(ev),
+            EventMsg::ExecCommandEnd(ev) => self.handle_exec_command_end(ev),
+            EventMsg::McpToolCallBegin(ev) => self.handle_mcp_tool_call_begin(ev),
+            EventMsg::McpToolCallEnd(ev) => self.handle_mcp_tool_call_end(ev),
+            EventMsg::PatchApplyBegin(ev) => self.handle_patch_apply_begin(ev),
+            EventMsg::PatchApplyEnd(ev) => self.handle_patch_apply_end(ev),
+            EventMsg::WebSearchBegin(_) => Vec::new(),
+            EventMsg::WebSearchEnd(ev) => self.handle_web_search_end(ev),
+            EventMsg::TokenCount(ev) => {
+                if let Some(info) = &ev.info {
+                    self.last_total_token_usage = Some(info.total_token_usage.clone());
+                }
+                Vec::new()
+            }
+            EventMsg::TaskStarted(ev) => self.handle_task_started(ev),
+            EventMsg::TaskComplete(_) => self.handle_task_complete(),
+            EventMsg::Error(ev) => {
+                let error = ThreadErrorEvent {
+                    message: ev.message.clone(),
+                };
+                self.last_critical_error = Some(error.clone());
+                vec![ThreadEvent::Error(error)]
+            }
+            EventMsg::Warning(ev) => {
+                let item = ThreadItem {
+                    id: self.get_next_item_id(),
+                    details: ThreadItemDetails::Error(ErrorItem {
+                        message: ev.message.clone(),
+                    }),
+                };
+                vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+            }
+            EventMsg::StreamError(ev) => vec![ThreadEvent::Error(ThreadErrorEvent {
+                message: ev.message.clone(),
+            })],
+            EventMsg::PlanUpdate(ev) => self.handle_plan_update(ev),
+            _ => Vec::new(),
+        }
+    }
+
+    fn get_next_item_id(&self) -> String {
+        format!(
+            "item_{}",
+            self.next_event_id
+                .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
+        )
+    }
+
+    fn handle_session_configured(&self, payload: &SessionConfiguredEvent) -> Vec<ThreadEvent> {
+        vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
+            thread_id: payload.session_id.to_string(),
+        })]
+    }
+
+    fn handle_web_search_end(&self, ev: &WebSearchEndEvent) -> Vec<ThreadEvent> {
+        let item = ThreadItem {
+            id: self.get_next_item_id(),
+            details: ThreadItemDetails::WebSearch(WebSearchItem {
+                query: ev.query.clone(),
+            }),
+        };
+
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+    }
+
+    fn handle_agent_message(&self, payload: &AgentMessageEvent) -> Vec<ThreadEvent> {
+        let item = ThreadItem {
+            id: self.get_next_item_id(),
+
+            details: ThreadItemDetails::AgentMessage(AgentMessageItem {
+                text: payload.message.clone(),
+            }),
+        };
+
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+    }
+
+    fn handle_reasoning_event(&self, ev: &AgentReasoningEvent) -> Vec<ThreadEvent> {
+        let item = ThreadItem {
+            id: self.get_next_item_id(),
+
+            details: ThreadItemDetails::Reasoning(ReasoningItem {
+                text: ev.text.clone(),
+            }),
+        };
+
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+    }
+    fn handle_exec_command_begin(&mut self, ev: &ExecCommandBeginEvent) -> Vec<ThreadEvent> {
+        let item_id = self.get_next_item_id();
+
+        let command_string = match shlex::try_join(ev.command.iter().map(String::as_str)) {
+            Ok(command_string) => command_string,
+            Err(e) => {
+                warn!(
+                    call_id = ev.call_id,
+                    "Failed to stringify command: {e:?}; skipping item.started"
+                );
+                ev.command.join(" ")
+            }
+        };
+
+        self.running_commands.insert(
+            ev.call_id.clone(),
+            RunningCommand {
+                command: command_string.clone(),
+                item_id: item_id.clone(),
+            },
+        );
+
+        let item = ThreadItem {
+            id: item_id,
+            details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                command: command_string,
+                aggregated_output: String::new(),
+                exit_code: None,
+                status: CommandExecutionStatus::InProgress,
+            }),
+        };
+
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
+    }
+
+    fn handle_mcp_tool_call_begin(&mut self, ev: &McpToolCallBeginEvent) -> Vec<ThreadEvent> {
+        let item_id = self.get_next_item_id();
+        let server = ev.invocation.server.clone();
+        let tool = ev.invocation.tool.clone();
+        let arguments = ev.invocation.arguments.clone().unwrap_or(JsonValue::Null);
+
+        self.running_mcp_tool_calls.insert(
+            ev.call_id.clone(),
+            RunningMcpToolCall {
+                server: server.clone(),
+                tool: tool.clone(),
+                item_id: item_id.clone(),
+                arguments: arguments.clone(),
+            },
+        );
+
+        let item = ThreadItem {
+            id: item_id,
+            details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                server,
+                tool,
+                arguments,
+                result: None,
+                error: None,
+                status: McpToolCallStatus::InProgress,
+            }),
+        };
+
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
+    }
+
+    fn handle_mcp_tool_call_end(&mut self, ev: &McpToolCallEndEvent) -> Vec<ThreadEvent> {
+        let status = if ev.is_success() {
+            McpToolCallStatus::Completed
+        } else {
+            McpToolCallStatus::Failed
+        };
+
+        let (server, tool, item_id, arguments) =
+            match self.running_mcp_tool_calls.remove(&ev.call_id) {
+                Some(running) => (
+                    running.server,
+                    running.tool,
+                    running.item_id,
+                    running.arguments,
+                ),
+                None => {
+                    warn!(
+                        call_id = ev.call_id,
+                        "Received McpToolCallEnd without begin; synthesizing new item"
+                    );
+                    (
+                        ev.invocation.server.clone(),
+                        ev.invocation.tool.clone(),
+                        self.get_next_item_id(),
+                        ev.invocation.arguments.clone().unwrap_or(JsonValue::Null),
+                    )
+                }
+            };
+
+        let (result, error) = match &ev.result {
+            Ok(value) => {
+                let result = McpToolCallItemResult {
+                    content: value.content.clone(),
+                    structured_content: value.structured_content.clone(),
+                };
+                (Some(result), None)
+            }
+            Err(message) => (
+                None,
+                Some(McpToolCallItemError {
+                    message: message.clone(),
+                }),
+            ),
+        };
+
+        let item = ThreadItem {
+            id: item_id,
+            details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                server,
+                tool,
+                arguments,
+                result,
+                error,
+                status,
+            }),
+        };
+
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+    }
+
+    fn handle_patch_apply_begin(&mut self, ev: &PatchApplyBeginEvent) -> Vec<ThreadEvent> {
+        self.running_patch_applies
+            .insert(ev.call_id.clone(), ev.clone());
+
+        Vec::new()
+    }
+
+    fn map_change_kind(&self, kind: &FileChange) -> PatchChangeKind {
+        match kind {
+            FileChange::Add { .. } => PatchChangeKind::Add,
+            FileChange::Delete { .. } => PatchChangeKind::Delete,
+            FileChange::Update { .. } => PatchChangeKind::Update,
+        }
+    }
+
+    fn handle_patch_apply_end(&mut self, ev: &PatchApplyEndEvent) -> Vec<ThreadEvent> {
+        if let Some(running_patch_apply) = self.running_patch_applies.remove(&ev.call_id) {
+            let status = if ev.success {
+                PatchApplyStatus::Completed
+            } else {
+                PatchApplyStatus::Failed
+            };
+            let item = ThreadItem {
+                id: self.get_next_item_id(),
+
+                details: ThreadItemDetails::FileChange(FileChangeItem {
+                    changes: running_patch_apply
+                        .changes
+                        .iter()
+                        .map(|(path, change)| FileUpdateChange {
+                            path: path.to_str().unwrap_or("").to_string(),
+                            kind: self.map_change_kind(change),
+                        })
+                        .collect(),
+                    status,
+                }),
+            };
+
+            return vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })];
+        }
+
+        Vec::new()
+    }
+
+    fn handle_exec_command_end(&mut self, ev: &ExecCommandEndEvent) -> Vec<ThreadEvent> {
+        let Some(RunningCommand { command, item_id }) = self.running_commands.remove(&ev.call_id)
+        else {
+            warn!(
+                call_id = ev.call_id,
+                "ExecCommandEnd without matching ExecCommandBegin; skipping item.completed"
+            );
+            return Vec::new();
+        };
+        let status = if ev.exit_code == 0 {
+            CommandExecutionStatus::Completed
+        } else {
+            CommandExecutionStatus::Failed
+        };
+        let item = ThreadItem {
+            id: item_id,
+
+            details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                command,
+                aggregated_output: ev.aggregated_output.clone(),
+                exit_code: Some(ev.exit_code),
+                status,
+            }),
+        };
+
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+    }
+
+    fn todo_items_from_plan(&self, args: &UpdatePlanArgs) -> Vec<TodoItem> {
+        args.plan
+            .iter()
+            .map(|p| TodoItem {
+                text: p.step.clone(),
+                completed: matches!(p.status, StepStatus::Completed),
+            })
+            .collect()
+    }
+
+    fn handle_plan_update(&mut self, args: &UpdatePlanArgs) -> Vec<ThreadEvent> {
+        let items = self.todo_items_from_plan(args);
+
+        if let Some(running) = &mut self.running_todo_list {
+            running.items = items.clone();
+            let item = ThreadItem {
+                id: running.item_id.clone(),
+                details: ThreadItemDetails::TodoList(TodoListItem { items }),
+            };
+            return vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent { item })];
+        }
+
+        let item_id = self.get_next_item_id();
+        self.running_todo_list = Some(RunningTodoList {
+            item_id: item_id.clone(),
+            items: items.clone(),
+        });
+        let item = ThreadItem {
+            id: item_id,
+            details: ThreadItemDetails::TodoList(TodoListItem { items }),
+        };
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
+    }
+
+    fn handle_task_started(&mut self, _: &TaskStartedEvent) -> Vec<ThreadEvent> {
+        self.last_critical_error = None;
+        vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]
+    }
+
+    fn handle_task_complete(&mut self) -> Vec<ThreadEvent> {
+        let usage = if let Some(u) = &self.last_total_token_usage {
+            Usage {
+                input_tokens: u.input_tokens,
+                cached_input_tokens: u.cached_input_tokens,
+                output_tokens: u.output_tokens,
+            }
+        } else {
+            Usage::default()
+        };
+
+        let mut items = Vec::new();
+
+        if let Some(running) = self.running_todo_list.take() {
+            let item = ThreadItem {
+                id: running.item_id,
+                details: ThreadItemDetails::TodoList(TodoListItem {
+                    items: running.items,
+                }),
+            };
+            items.push(ThreadEvent::ItemCompleted(ItemCompletedEvent { item }));
+        }
+
+        if let Some(error) = self.last_critical_error.take() {
+            items.push(ThreadEvent::TurnFailed(TurnFailedEvent { error }));
+        } else {
+            items.push(ThreadEvent::TurnCompleted(TurnCompletedEvent { usage }));
+        }
+
+        items
+    }
+}
+
+impl EventProcessor for EventProcessorWithJsonOutput {
+    fn print_config_summary(&mut self, _: &Config, _: &str, ev: &SessionConfiguredEvent) {
+        self.process_event(Event {
+            id: "".to_string(),
+            msg: EventMsg::SessionConfigured(ev.clone()),
+        });
+    }
+
+    #[allow(clippy::print_stdout)]
+    fn process_event(&mut self, event: Event) -> CodexStatus {
+        let aggregated = self.collect_thread_events(&event);
+        for conv_event in aggregated {
+            match serde_json::to_string(&conv_event) {
+                Ok(line) => {
+                    println!("{line}");
+                }
+                Err(e) => {
+                    error!("Failed to serialize event: {e:?}");
+                }
+            }
+        }
+
+        let Event { msg, .. } = event;
+
+        if let EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) = msg {
+            if let Some(output_file) = self.last_message_path.as_deref() {
+                handle_last_message(last_agent_message.as_deref(), output_file);
+            }
+            CodexStatus::InitiateShutdown
+        } else {
+            CodexStatus::Running
+        }
+    }
+}
--- a/llmx-rs/exec/src/exec_events.rs
+++ b/llmx-rs/exec/src/exec_events.rs
@@ -0,0 +1,246 @@
+use mcp_types::ContentBlock as McpContentBlock;
+use serde::Deserialize;
+use serde::Serialize;
+use serde_json::Value as JsonValue;
+use ts_rs::TS;
+
+/// Top-level JSONL events emitted by codex exec
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+#[serde(tag = "type")]
+pub enum ThreadEvent {
+    /// Emitted when a new thread is started as the first event.
+    #[serde(rename = "thread.started")]
+    ThreadStarted(ThreadStartedEvent),
+    /// Emitted when a turn is started by sending a new prompt to the model.
+    /// A turn encompasses all events that happen while agent is processing the prompt.
+    #[serde(rename = "turn.started")]
+    TurnStarted(TurnStartedEvent),
+    /// Emitted when a turn is completed. Typically right after the assistant's response.
+    #[serde(rename = "turn.completed")]
+    TurnCompleted(TurnCompletedEvent),
+    /// Indicates that a turn failed with an error.
+    #[serde(rename = "turn.failed")]
+    TurnFailed(TurnFailedEvent),
+    /// Emitted when a new item is added to the thread. Typically the item will be in an "in progress" state.
+    #[serde(rename = "item.started")]
+    ItemStarted(ItemStartedEvent),
+    /// Emitted when an item is updated.
+    #[serde(rename = "item.updated")]
+    ItemUpdated(ItemUpdatedEvent),
+    /// Signals that an item has reached a terminal state—either success or failure.
+    #[serde(rename = "item.completed")]
+    ItemCompleted(ItemCompletedEvent),
+    /// Represents an unrecoverable error emitted directly by the event stream.
+    #[serde(rename = "error")]
+    Error(ThreadErrorEvent),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ThreadStartedEvent {
+    /// The identified of the new thread. Can be used to resume the thread later.
+    pub thread_id: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
+
+pub struct TurnStartedEvent {}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct TurnCompletedEvent {
+    pub usage: Usage,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct TurnFailedEvent {
+    pub error: ThreadErrorEvent,
+}
+
+/// Describes the usage of tokens during a turn.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
+pub struct Usage {
+    /// The number of input tokens used during the turn.
+    pub input_tokens: i64,
+    /// The number of cached input tokens used during the turn.
+    pub cached_input_tokens: i64,
+    /// The number of output tokens used during the turn.
+    pub output_tokens: i64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ItemStartedEvent {
+    pub item: ThreadItem,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ItemCompletedEvent {
+    pub item: ThreadItem,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ItemUpdatedEvent {
+    pub item: ThreadItem,
+}
+
+/// Fatal error emitted by the stream.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ThreadErrorEvent {
+    pub message: String,
+}
+
+/// Canonical representation of a thread item and its domain-specific payload.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ThreadItem {
+    pub id: String,
+    #[serde(flatten)]
+    pub details: ThreadItemDetails,
+}
+
+/// Typed payloads for each supported thread item type.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ThreadItemDetails {
+    /// Response from the agent.
+    /// Either a natural-language response or a JSON string when structured output is requested.
+    AgentMessage(AgentMessageItem),
+    /// Agent's reasoning summary.
+    Reasoning(ReasoningItem),
+    /// Tracks a command executed by the agent. The item starts when the command is
+    /// spawned, and completes when the process exits with an exit code.
+    CommandExecution(CommandExecutionItem),
+    /// Represents a set of file changes by the agent. The item is emitted only as a
+    /// completed event once the patch succeeds or fails.
+    FileChange(FileChangeItem),
+    /// Represents a call to an MCP tool. The item starts when the invocation is
+    /// dispatched and completes when the MCP server reports success or failure.
+    McpToolCall(McpToolCallItem),
+    /// Captures a web search request. It starts when the search is kicked off
+    /// and completes when results are returned to the agent.
+    WebSearch(WebSearchItem),
+    /// Tracks the agent's running to-do list. It starts when the plan is first
+    /// issued, updates as steps change state, and completes when the turn ends.
+    TodoList(TodoListItem),
+    /// Describes a non-fatal error surfaced as an item.
+    Error(ErrorItem),
+}
+
+/// Response from the agent.
+/// Either a natural-language response or a JSON string when structured output is requested.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct AgentMessageItem {
+    pub text: String,
+}
+
+/// Agent's reasoning summary.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ReasoningItem {
+    pub text: String,
+}
+
+/// The status of a command execution.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum CommandExecutionStatus {
+    #[default]
+    InProgress,
+    Completed,
+    Failed,
+}
+
+/// A command executed by the agent.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct CommandExecutionItem {
+    pub command: String,
+    pub aggregated_output: String,
+    pub exit_code: Option<i32>,
+    pub status: CommandExecutionStatus,
+}
+
+/// A set of file changes by the agent.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct FileUpdateChange {
+    pub path: String,
+    pub kind: PatchChangeKind,
+}
+
+/// The status of a file change.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum PatchApplyStatus {
+    Completed,
+    Failed,
+}
+
+/// A set of file changes by the agent.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct FileChangeItem {
+    pub changes: Vec<FileUpdateChange>,
+    pub status: PatchApplyStatus,
+}
+
+/// Indicates the type of the file change.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum PatchChangeKind {
+    Add,
+    Delete,
+    Update,
+}
+
+/// The status of an MCP tool call.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum McpToolCallStatus {
+    #[default]
+    InProgress,
+    Completed,
+    Failed,
+}
+
+/// Result payload produced by an MCP tool invocation.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct McpToolCallItemResult {
+    pub content: Vec<McpContentBlock>,
+    pub structured_content: Option<JsonValue>,
+}
+
+/// Error details reported by a failed MCP tool invocation.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct McpToolCallItemError {
+    pub message: String,
+}
+
+/// A call to an MCP tool.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct McpToolCallItem {
+    pub server: String,
+    pub tool: String,
+    #[serde(default)]
+    pub arguments: JsonValue,
+    pub result: Option<McpToolCallItemResult>,
+    pub error: Option<McpToolCallItemError>,
+    pub status: McpToolCallStatus,
+}
+
+/// A web search request.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct WebSearchItem {
+    pub query: String,
+}
+
+/// An error notification.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct ErrorItem {
+    pub message: String,
+}
+
+/// An item in agent's to-do list.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct TodoItem {
+    pub text: String,
+    pub completed: bool,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
+pub struct TodoListItem {
+    pub items: Vec<TodoItem>,
+}
--- a/llmx-rs/exec/src/lib.rs
+++ b/llmx-rs/exec/src/lib.rs
@@ -0,0 +1,427 @@
+// - In the default output mode, it is paramount that the only thing written to
+//   stdout is the final message (if any).
+// - In --json mode, stdout must be valid JSONL, one event per line.
+// For both modes, any other output must be written to stderr.
+#![deny(clippy::print_stdout)]
+
+mod cli;
+mod event_processor;
+mod event_processor_with_human_output;
+pub mod event_processor_with_jsonl_output;
+pub mod exec_events;
+
+pub use cli::Cli;
+use codex_core::AuthManager;
+use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
+use codex_core::ConversationManager;
+use codex_core::NewConversation;
+use codex_core::auth::enforce_login_restrictions;
+use codex_core::config::Config;
+use codex_core::config::ConfigOverrides;
+use codex_core::git_info::get_git_repo_root;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::Op;
+use codex_core::protocol::SessionSource;
+use codex_ollama::DEFAULT_OSS_MODEL;
+use codex_protocol::config_types::SandboxMode;
+use codex_protocol::user_input::UserInput;
+use event_processor_with_human_output::EventProcessorWithHumanOutput;
+use event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
+use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
+use serde_json::Value;
+use std::io::IsTerminal;
+use std::io::Read;
+use std::path::PathBuf;
+use supports_color::Stream;
+use tracing::debug;
+use tracing::error;
+use tracing::info;
+use tracing_subscriber::EnvFilter;
+use tracing_subscriber::prelude::*;
+
+use crate::cli::Command as ExecCommand;
+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+use codex_core::default_client::set_default_originator;
+use codex_core::find_conversation_path_by_id_str;
+
+pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
+    if let Err(err) = set_default_originator("codex_exec".to_string()) {
+        tracing::warn!(?err, "Failed to set codex exec originator override {err:?}");
+    }
+
+    let Cli {
+        command,
+        images,
+        model: model_cli_arg,
+        oss,
+        config_profile,
+        full_auto,
+        dangerously_bypass_approvals_and_sandbox,
+        cwd,
+        skip_git_repo_check,
+        color,
+        last_message_file,
+        json: json_mode,
+        sandbox_mode: sandbox_mode_cli_arg,
+        prompt,
+        output_schema: output_schema_path,
+        config_overrides,
+    } = cli;
+
+    // Determine the prompt source (parent or subcommand) and read from stdin if needed.
+    let prompt_arg = match &command {
+        // Allow prompt before the subcommand by falling back to the parent-level prompt
+        // when the Resume subcommand did not provide its own prompt.
+        Some(ExecCommand::Resume(args)) => args.prompt.clone().or(prompt),
+        None => prompt,
+    };
+
+    let prompt = match prompt_arg {
+        Some(p) if p != "-" => p,
+        // Either `-` was passed or no positional arg.
+        maybe_dash => {
+            // When no arg (None) **and** stdin is a TTY, bail out early – unless the
+            // user explicitly forced reading via `-`.
+            let force_stdin = matches!(maybe_dash.as_deref(), Some("-"));
+
+            if std::io::stdin().is_terminal() && !force_stdin {
+                eprintln!(
+                    "No prompt provided. Either specify one as an argument or pipe the prompt into stdin."
+                );
+                std::process::exit(1);
+            }
+
+            // Ensure the user knows we are waiting on stdin, as they may
+            // have gotten into this state by mistake. If so, and they are not
+            // writing to stdin, Codex will hang indefinitely, so this should
+            // help them debug in that case.
+            if !force_stdin {
+                eprintln!("Reading prompt from stdin...");
+            }
+            let mut buffer = String::new();
+            if let Err(e) = std::io::stdin().read_to_string(&mut buffer) {
+                eprintln!("Failed to read prompt from stdin: {e}");
+                std::process::exit(1);
+            } else if buffer.trim().is_empty() {
+                eprintln!("No prompt provided via stdin.");
+                std::process::exit(1);
+            }
+            buffer
+        }
+    };
+
+    let output_schema = load_output_schema(output_schema_path);
+
+    let (stdout_with_ansi, stderr_with_ansi) = match color {
+        cli::Color::Always => (true, true),
+        cli::Color::Never => (false, false),
+        cli::Color::Auto => (
+            supports_color::on_cached(Stream::Stdout).is_some(),
+            supports_color::on_cached(Stream::Stderr).is_some(),
+        ),
+    };
+
+    // Build fmt layer (existing logging) to compose with OTEL layer.
+    let default_level = "error";
+
+    // Build env_filter separately and attach via with_filter.
+    let env_filter = EnvFilter::try_from_default_env()
+        .or_else(|_| EnvFilter::try_new(default_level))
+        .unwrap_or_else(|_| EnvFilter::new(default_level));
+
+    let fmt_layer = tracing_subscriber::fmt::layer()
+        .with_ansi(stderr_with_ansi)
+        .with_writer(std::io::stderr)
+        .with_filter(env_filter);
+
+    let sandbox_mode = if full_auto {
+        Some(SandboxMode::WorkspaceWrite)
+    } else if dangerously_bypass_approvals_and_sandbox {
+        Some(SandboxMode::DangerFullAccess)
+    } else {
+        sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
+    };
+
+    // When using `--oss`, let the bootstrapper pick the model (defaulting to
+    // gpt-oss:20b) and ensure it is present locally. Also, force the built‑in
+    // `oss` model provider.
+    let model = if let Some(model) = model_cli_arg {
+        Some(model)
+    } else if oss {
+        Some(DEFAULT_OSS_MODEL.to_owned())
+    } else {
+        None // No model specified, will use the default.
+    };
+
+    let model_provider = if oss {
+        Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
+    } else {
+        None // No specific model provider override.
+    };
+
+    // Load configuration and determine approval policy
+    let overrides = ConfigOverrides {
+        model,
+        review_model: None,
+        config_profile,
+        // Default to never ask for approvals in headless mode. Feature flags can override.
+        approval_policy: Some(AskForApproval::Never),
+        sandbox_mode,
+        cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
+        model_provider,
+        codex_linux_sandbox_exe,
+        base_instructions: None,
+        developer_instructions: None,
+        compact_prompt: None,
+        include_apply_patch_tool: None,
+        show_raw_agent_reasoning: oss.then_some(true),
+        tools_web_search_request: None,
+        experimental_sandbox_command_assessment: None,
+        additional_writable_roots: Vec::new(),
+    };
+    // Parse `-c` overrides.
+    let cli_kv_overrides = match config_overrides.parse_overrides() {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("Error parsing -c overrides: {e}");
+            std::process::exit(1);
+        }
+    };
+
+    let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
+
+    if let Err(err) = enforce_login_restrictions(&config).await {
+        eprintln!("{err}");
+        std::process::exit(1);
+    }
+
+    let otel = codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION"));
+
+    #[allow(clippy::print_stderr)]
+    let otel = match otel {
+        Ok(otel) => otel,
+        Err(e) => {
+            eprintln!("Could not create otel exporter: {e}");
+            std::process::exit(1);
+        }
+    };
+
+    if let Some(provider) = otel.as_ref() {
+        let otel_layer = OpenTelemetryTracingBridge::new(&provider.logger).with_filter(
+            tracing_subscriber::filter::filter_fn(codex_core::otel_init::codex_export_filter),
+        );
+
+        let _ = tracing_subscriber::registry()
+            .with(fmt_layer)
+            .with(otel_layer)
+            .try_init();
+    } else {
+        let _ = tracing_subscriber::registry().with(fmt_layer).try_init();
+    }
+
+    let mut event_processor: Box<dyn EventProcessor> = match json_mode {
+        true => Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone())),
+        _ => Box::new(EventProcessorWithHumanOutput::create_with_ansi(
+            stdout_with_ansi,
+            &config,
+            last_message_file.clone(),
+        )),
+    };
+
+    if oss {
+        codex_ollama::ensure_oss_ready(&config)
+            .await
+            .map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
+    }
+
+    let default_cwd = config.cwd.to_path_buf();
+    let default_approval_policy = config.approval_policy;
+    let default_sandbox_policy = config.sandbox_policy.clone();
+    let default_model = config.model.clone();
+    let default_effort = config.model_reasoning_effort;
+    let default_summary = config.model_reasoning_summary;
+
+    if !skip_git_repo_check && get_git_repo_root(&default_cwd).is_none() {
+        eprintln!("Not inside a trusted directory and --skip-git-repo-check was not specified.");
+        std::process::exit(1);
+    }
+
+    let auth_manager = AuthManager::shared(
+        config.codex_home.clone(),
+        true,
+        config.cli_auth_credentials_store_mode,
+    );
+    let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
+
+    // Handle resume subcommand by resolving a rollout path and using explicit resume API.
+    let NewConversation {
+        conversation_id: _,
+        conversation,
+        session_configured,
+    } = if let Some(ExecCommand::Resume(args)) = command {
+        let resume_path = resolve_resume_path(&config, &args).await?;
+
+        if let Some(path) = resume_path {
+            conversation_manager
+                .resume_conversation_from_rollout(config.clone(), path, auth_manager.clone())
+                .await?
+        } else {
+            conversation_manager
+                .new_conversation(config.clone())
+                .await?
+        }
+    } else {
+        conversation_manager
+            .new_conversation(config.clone())
+            .await?
+    };
+    // Print the effective configuration and prompt so users can see what Codex
+    // is using.
+    event_processor.print_config_summary(&config, &prompt, &session_configured);
+
+    info!("Codex initialized with event: {session_configured:?}");
+
+    let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Event>();
+    {
+        let conversation = conversation.clone();
+        tokio::spawn(async move {
+            loop {
+                tokio::select! {
+                    _ = tokio::signal::ctrl_c() => {
+                        tracing::debug!("Keyboard interrupt");
+                        // Immediately notify Codex to abort any in‑flight task.
+                        conversation.submit(Op::Interrupt).await.ok();
+
+                        // Exit the inner loop and return to the main input prompt. The codex
+                        // will emit a `TurnInterrupted` (Error) event which is drained later.
+                        break;
+                    }
+                    res = conversation.next_event() => match res {
+                        Ok(event) => {
+                            debug!("Received event: {event:?}");
+
+                            let is_shutdown_complete = matches!(event.msg, EventMsg::ShutdownComplete);
+                            if let Err(e) = tx.send(event) {
+                                error!("Error sending event: {e:?}");
+                                break;
+                            }
+                            if is_shutdown_complete {
+                                info!("Received shutdown event, exiting event loop.");
+                                break;
+                            }
+                        },
+                        Err(e) => {
+                            error!("Error receiving event: {e:?}");
+                            break;
+                        }
+                    }
+                }
+            }
+        });
+    }
+
+    // Package images and prompt into a single user input turn.
+    let mut items: Vec<UserInput> = images
+        .into_iter()
+        .map(|path| UserInput::LocalImage { path })
+        .collect();
+    items.push(UserInput::Text { text: prompt });
+    let initial_prompt_task_id = conversation
+        .submit(Op::UserTurn {
+            items,
+            cwd: default_cwd,
+            approval_policy: default_approval_policy,
+            sandbox_policy: default_sandbox_policy,
+            model: default_model,
+            effort: default_effort,
+            summary: default_summary,
+            final_output_json_schema: output_schema,
+        })
+        .await?;
+    info!("Sent prompt with event ID: {initial_prompt_task_id}");
+
+    // Run the loop until the task is complete.
+    // Track whether a fatal error was reported by the server so we can
+    // exit with a non-zero status for automation-friendly signaling.
+    let mut error_seen = false;
+    while let Some(event) = rx.recv().await {
+        if matches!(event.msg, EventMsg::Error(_)) {
+            error_seen = true;
+        }
+        let shutdown: CodexStatus = event_processor.process_event(event);
+        match shutdown {
+            CodexStatus::Running => continue,
+            CodexStatus::InitiateShutdown => {
+                conversation.submit(Op::Shutdown).await?;
+            }
+            CodexStatus::Shutdown => {
+                break;
+            }
+        }
+    }
+    event_processor.print_final_output();
+    if error_seen {
+        std::process::exit(1);
+    }
+
+    Ok(())
+}
+
+async fn resolve_resume_path(
+    config: &Config,
+    args: &crate::cli::ResumeArgs,
+) -> anyhow::Result<Option<PathBuf>> {
+    if args.last {
+        let default_provider_filter = vec![config.model_provider_id.clone()];
+        match codex_core::RolloutRecorder::list_conversations(
+            &config.codex_home,
+            1,
+            None,
+            &[],
+            Some(default_provider_filter.as_slice()),
+            &config.model_provider_id,
+        )
+        .await
+        {
+            Ok(page) => Ok(page.items.first().map(|it| it.path.clone())),
+            Err(e) => {
+                error!("Error listing conversations: {e}");
+                Ok(None)
+            }
+        }
+    } else if let Some(id_str) = args.session_id.as_deref() {
+        let path = find_conversation_path_by_id_str(&config.codex_home, id_str).await?;
+        Ok(path)
+    } else {
+        Ok(None)
+    }
+}
+
+fn load_output_schema(path: Option<PathBuf>) -> Option<Value> {
+    let path = path?;
+
+    let schema_str = match std::fs::read_to_string(&path) {
+        Ok(contents) => contents,
+        Err(err) => {
+            eprintln!(
+                "Failed to read output schema file {}: {err}",
+                path.display()
+            );
+            std::process::exit(1);
+        }
+    };
+
+    match serde_json::from_str::<Value>(&schema_str) {
+        Ok(value) => Some(value),
+        Err(err) => {
+            eprintln!(
+                "Output schema file {} is not valid JSON: {err}",
+                path.display()
+            );
+            std::process::exit(1);
+        }
+    }
+}
--- a/llmx-rs/exec/src/main.rs
+++ b/llmx-rs/exec/src/main.rs
@@ -0,0 +1,40 @@
+//! Entry-point for the `codex-exec` binary.
+//!
+//! When this CLI is invoked normally, it parses the standard `codex-exec` CLI
+//! options and launches the non-interactive Codex agent. However, if it is
+//! invoked with arg0 as `codex-linux-sandbox`, we instead treat the invocation
+//! as a request to run the logic for the standalone `codex-linux-sandbox`
+//! executable (i.e., parse any -s args and then run a *sandboxed* command under
+//! Landlock + seccomp.
+//!
+//! This allows us to ship a completely separate set of functionality as part
+//! of the `codex-exec` binary.
+use clap::Parser;
+use codex_arg0::arg0_dispatch_or_else;
+use codex_common::CliConfigOverrides;
+use codex_exec::Cli;
+use codex_exec::run_main;
+
+#[derive(Parser, Debug)]
+struct TopCli {
+    #[clap(flatten)]
+    config_overrides: CliConfigOverrides,
+
+    #[clap(flatten)]
+    inner: Cli,
+}
+
+fn main() -> anyhow::Result<()> {
+    arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
+        let top_cli = TopCli::parse();
+        // Merge root-level overrides into inner CLI struct so downstream logic remains unchanged.
+        let mut inner = top_cli.inner;
+        inner
+            .config_overrides
+            .raw_overrides
+            .splice(0..0, top_cli.config_overrides.raw_overrides);
+
+        run_main(inner, codex_linux_sandbox_exe).await?;
+        Ok(())
+    })
+}
--- a/llmx-rs/exec/tests/all.rs
+++ b/llmx-rs/exec/tests/all.rs
@@ -0,0 +1,5 @@
+// Single integration test binary that aggregates all test modules.
+// The submodules live in `tests/suite/`.
+mod suite;
+
+mod event_processor_with_json_output;
--- a/llmx-rs/exec/tests/event_processor_with_json_output.rs
+++ b/llmx-rs/exec/tests/event_processor_with_json_output.rs
@@ -0,0 +1,939 @@
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningEvent;
+use codex_core::protocol::ErrorEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::McpInvocation;
+use codex_core::protocol::McpToolCallBeginEvent;
+use codex_core::protocol::McpToolCallEndEvent;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::WarningEvent;
+use codex_core::protocol::WebSearchEndEvent;
+use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
+use codex_exec::exec_events::AgentMessageItem;
+use codex_exec::exec_events::CommandExecutionItem;
+use codex_exec::exec_events::CommandExecutionStatus;
+use codex_exec::exec_events::ErrorItem;
+use codex_exec::exec_events::ItemCompletedEvent;
+use codex_exec::exec_events::ItemStartedEvent;
+use codex_exec::exec_events::ItemUpdatedEvent;
+use codex_exec::exec_events::McpToolCallItem;
+use codex_exec::exec_events::McpToolCallItemError;
+use codex_exec::exec_events::McpToolCallItemResult;
+use codex_exec::exec_events::McpToolCallStatus;
+use codex_exec::exec_events::PatchApplyStatus;
+use codex_exec::exec_events::PatchChangeKind;
+use codex_exec::exec_events::ReasoningItem;
+use codex_exec::exec_events::ThreadErrorEvent;
+use codex_exec::exec_events::ThreadEvent;
+use codex_exec::exec_events::ThreadItem;
+use codex_exec::exec_events::ThreadItemDetails;
+use codex_exec::exec_events::ThreadStartedEvent;
+use codex_exec::exec_events::TodoItem as ExecTodoItem;
+use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
+use codex_exec::exec_events::TurnCompletedEvent;
+use codex_exec::exec_events::TurnFailedEvent;
+use codex_exec::exec_events::TurnStartedEvent;
+use codex_exec::exec_events::Usage;
+use codex_exec::exec_events::WebSearchItem;
+use codex_protocol::plan_tool::PlanItemArg;
+use codex_protocol::plan_tool::StepStatus;
+use codex_protocol::plan_tool::UpdatePlanArgs;
+use mcp_types::CallToolResult;
+use mcp_types::ContentBlock;
+use mcp_types::TextContent;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use std::path::PathBuf;
+use std::time::Duration;
+
+fn event(id: &str, msg: EventMsg) -> Event {
+    Event {
+        id: id.to_string(),
+        msg,
+    }
+}
+
+#[test]
+fn session_configured_produces_thread_started_event() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let session_id =
+        codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")
+            .unwrap();
+    let rollout_path = PathBuf::from("/tmp/rollout.json");
+    let ev = event(
+        "e1",
+        EventMsg::SessionConfigured(SessionConfiguredEvent {
+            session_id,
+            model: "codex-mini-latest".to_string(),
+            reasoning_effort: None,
+            history_log_id: 0,
+            history_entry_count: 0,
+            initial_messages: None,
+            rollout_path,
+        }),
+    );
+    let out = ep.collect_thread_events(&ev);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
+            thread_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn task_started_produces_turn_started_event() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "t1",
+        EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
+            model_context_window: Some(32_000),
+        }),
+    ));
+
+    assert_eq!(out, vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]);
+}
+
+#[test]
+fn web_search_end_emits_item_completed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let query = "rust async await".to_string();
+    let out = ep.collect_thread_events(&event(
+        "w1",
+        EventMsg::WebSearchEnd(WebSearchEndEvent {
+            call_id: "call-123".to_string(),
+            query: query.clone(),
+        }),
+    ));
+
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn plan_update_emits_todo_list_started_updated_and_completed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // First plan update => item.started (todo_list)
+    let first = event(
+        "p1",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![
+                PlanItemArg {
+                    step: "step one".to_string(),
+                    status: StepStatus::Pending,
+                },
+                PlanItemArg {
+                    step: "step two".to_string(),
+                    status: StepStatus::InProgress,
+                },
+            ],
+        }),
+    );
+    let out_first = ep.collect_thread_events(&first);
+    assert_eq!(
+        out_first,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::TodoList(ExecTodoListItem {
+                    items: vec![
+                        ExecTodoItem {
+                            text: "step one".to_string(),
+                            completed: false
+                        },
+                        ExecTodoItem {
+                            text: "step two".to_string(),
+                            completed: false
+                        },
+                    ],
+                }),
+            },
+        })]
+    );
+
+    // Second plan update in same turn => item.updated (same id)
+    let second = event(
+        "p2",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![
+                PlanItemArg {
+                    step: "step one".to_string(),
+                    status: StepStatus::Completed,
+                },
+                PlanItemArg {
+                    step: "step two".to_string(),
+                    status: StepStatus::InProgress,
+                },
+            ],
+        }),
+    );
+    let out_second = ep.collect_thread_events(&second);
+    assert_eq!(
+        out_second,
+        vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::TodoList(ExecTodoListItem {
+                    items: vec![
+                        ExecTodoItem {
+                            text: "step one".to_string(),
+                            completed: true
+                        },
+                        ExecTodoItem {
+                            text: "step two".to_string(),
+                            completed: false
+                        },
+                    ],
+                }),
+            },
+        })]
+    );
+
+    // Task completes => item.completed (same id, latest state)
+    let complete = event(
+        "p3",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    let out_complete = ep.collect_thread_events(&complete);
+    assert_eq!(
+        out_complete,
+        vec![
+            ThreadEvent::ItemCompleted(ItemCompletedEvent {
+                item: ThreadItem {
+                    id: "item_0".to_string(),
+                    details: ThreadItemDetails::TodoList(ExecTodoListItem {
+                        items: vec![
+                            ExecTodoItem {
+                                text: "step one".to_string(),
+                                completed: true
+                            },
+                            ExecTodoItem {
+                                text: "step two".to_string(),
+                                completed: false
+                            },
+                        ],
+                    }),
+                },
+            }),
+            ThreadEvent::TurnCompleted(TurnCompletedEvent {
+                usage: Usage::default(),
+            }),
+        ]
+    );
+}
+
+#[test]
+fn mcp_tool_call_begin_and_end_emit_item_events() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let invocation = McpInvocation {
+        server: "server_a".to_string(),
+        tool: "tool_x".to_string(),
+        arguments: Some(json!({ "key": "value" })),
+    };
+
+    let begin = event(
+        "m1",
+        EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+            call_id: "call-1".to_string(),
+            invocation: invocation.clone(),
+        }),
+    );
+    let begin_events = ep.collect_thread_events(&begin);
+    assert_eq!(
+        begin_events,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_a".to_string(),
+                    tool: "tool_x".to_string(),
+                    arguments: json!({ "key": "value" }),
+                    result: None,
+                    error: None,
+                    status: McpToolCallStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    let end = event(
+        "m2",
+        EventMsg::McpToolCallEnd(McpToolCallEndEvent {
+            call_id: "call-1".to_string(),
+            invocation,
+            duration: Duration::from_secs(1),
+            result: Ok(CallToolResult {
+                content: Vec::new(),
+                is_error: None,
+                structured_content: None,
+            }),
+        }),
+    );
+    let end_events = ep.collect_thread_events(&end);
+    assert_eq!(
+        end_events,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_a".to_string(),
+                    tool: "tool_x".to_string(),
+                    arguments: json!({ "key": "value" }),
+                    result: Some(McpToolCallItemResult {
+                        content: Vec::new(),
+                        structured_content: None,
+                    }),
+                    error: None,
+                    status: McpToolCallStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn mcp_tool_call_failure_sets_failed_status() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let invocation = McpInvocation {
+        server: "server_b".to_string(),
+        tool: "tool_y".to_string(),
+        arguments: Some(json!({ "param": 42 })),
+    };
+
+    let begin = event(
+        "m3",
+        EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+            call_id: "call-2".to_string(),
+            invocation: invocation.clone(),
+        }),
+    );
+    ep.collect_thread_events(&begin);
+
+    let end = event(
+        "m4",
+        EventMsg::McpToolCallEnd(McpToolCallEndEvent {
+            call_id: "call-2".to_string(),
+            invocation,
+            duration: Duration::from_millis(5),
+            result: Err("tool exploded".to_string()),
+        }),
+    );
+    let events = ep.collect_thread_events(&end);
+    assert_eq!(
+        events,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_b".to_string(),
+                    tool: "tool_y".to_string(),
+                    arguments: json!({ "param": 42 }),
+                    result: None,
+                    error: Some(McpToolCallItemError {
+                        message: "tool exploded".to_string(),
+                    }),
+                    status: McpToolCallStatus::Failed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let invocation = McpInvocation {
+        server: "server_c".to_string(),
+        tool: "tool_z".to_string(),
+        arguments: None,
+    };
+
+    let begin = event(
+        "m5",
+        EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+            call_id: "call-3".to_string(),
+            invocation: invocation.clone(),
+        }),
+    );
+    let begin_events = ep.collect_thread_events(&begin);
+    assert_eq!(
+        begin_events,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_c".to_string(),
+                    tool: "tool_z".to_string(),
+                    arguments: serde_json::Value::Null,
+                    result: None,
+                    error: None,
+                    status: McpToolCallStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    let end = event(
+        "m6",
+        EventMsg::McpToolCallEnd(McpToolCallEndEvent {
+            call_id: "call-3".to_string(),
+            invocation,
+            duration: Duration::from_millis(10),
+            result: Ok(CallToolResult {
+                content: vec![ContentBlock::TextContent(TextContent {
+                    annotations: None,
+                    text: "done".to_string(),
+                    r#type: "text".to_string(),
+                })],
+                is_error: None,
+                structured_content: Some(json!({ "status": "ok" })),
+            }),
+        }),
+    );
+    let events = ep.collect_thread_events(&end);
+    assert_eq!(
+        events,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::McpToolCall(McpToolCallItem {
+                    server: "server_c".to_string(),
+                    tool: "tool_z".to_string(),
+                    arguments: serde_json::Value::Null,
+                    result: Some(McpToolCallItemResult {
+                        content: vec![ContentBlock::TextContent(TextContent {
+                            annotations: None,
+                            text: "done".to_string(),
+                            r#type: "text".to_string(),
+                        })],
+                        structured_content: Some(json!({ "status": "ok" })),
+                    }),
+                    error: None,
+                    status: McpToolCallStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // First turn: start + complete
+    let start = event(
+        "t1",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![PlanItemArg {
+                step: "only".to_string(),
+                status: StepStatus::Pending,
+            }],
+        }),
+    );
+    let _ = ep.collect_thread_events(&start);
+    let complete = event(
+        "t2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    let _ = ep.collect_thread_events(&complete);
+
+    // Second turn: a new todo list should have a new id
+    let start_again = event(
+        "t3",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![PlanItemArg {
+                step: "again".to_string(),
+                status: StepStatus::Pending,
+            }],
+        }),
+    );
+    let out = ep.collect_thread_events(&start_again);
+
+    match &out[0] {
+        ThreadEvent::ItemStarted(ItemStartedEvent { item }) => {
+            assert_eq!(&item.id, "item_1");
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn agent_reasoning_produces_item_completed_reasoning() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let ev = event(
+        "e1",
+        EventMsg::AgentReasoning(AgentReasoningEvent {
+            text: "thinking...".to_string(),
+        }),
+    );
+    let out = ep.collect_thread_events(&ev);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::Reasoning(ReasoningItem {
+                    text: "thinking...".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn agent_message_produces_item_completed_agent_message() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let ev = event(
+        "e1",
+        EventMsg::AgentMessage(AgentMessageEvent {
+            message: "hello".to_string(),
+        }),
+    );
+    let out = ep.collect_thread_events(&ev);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::AgentMessage(AgentMessageItem {
+                    text: "hello".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn error_event_produces_error() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::Error(codex_core::protocol::ErrorEvent {
+            message: "boom".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::Error(ThreadErrorEvent {
+            message: "boom".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn warning_event_produces_error_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::Warning(WarningEvent {
+            message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::Error(ErrorItem {
+                    message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn stream_error_event_produces_error() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
+            message: "retrying".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::Error(ThreadErrorEvent {
+            message: "retrying".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn error_followed_by_task_complete_produces_turn_failed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    let error_event = event(
+        "e1",
+        EventMsg::Error(ErrorEvent {
+            message: "boom".to_string(),
+        }),
+    );
+    assert_eq!(
+        ep.collect_thread_events(&error_event),
+        vec![ThreadEvent::Error(ThreadErrorEvent {
+            message: "boom".to_string(),
+        })]
+    );
+
+    let complete_event = event(
+        "e2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    assert_eq!(
+        ep.collect_thread_events(&complete_event),
+        vec![ThreadEvent::TurnFailed(TurnFailedEvent {
+            error: ThreadErrorEvent {
+                message: "boom".to_string(),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_success_produces_completed_command_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // Begin -> no output
+    let begin = event(
+        "c1",
+        EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+            call_id: "1".to_string(),
+            command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
+            cwd: std::env::current_dir().unwrap(),
+            parsed_cmd: Vec::new(),
+            is_user_shell_command: false,
+        }),
+    );
+    let out_begin = ep.collect_thread_events(&begin);
+    assert_eq!(
+        out_begin,
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "bash -lc 'echo hi'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: None,
+                    status: CommandExecutionStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    // End (success) -> item.completed (item_0)
+    let end_ok = event(
+        "c2",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "1".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: "hi\n".to_string(),
+            exit_code: 0,
+            duration: Duration::from_millis(5),
+            formatted_output: String::new(),
+        }),
+    );
+    let out_ok = ep.collect_thread_events(&end_ok);
+    assert_eq!(
+        out_ok,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "bash -lc 'echo hi'".to_string(),
+                    aggregated_output: "hi\n".to_string(),
+                    exit_code: Some(0),
+                    status: CommandExecutionStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_failure_produces_failed_command_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // Begin -> no output
+    let begin = event(
+        "c1",
+        EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+            call_id: "2".to_string(),
+            command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
+            cwd: std::env::current_dir().unwrap(),
+            parsed_cmd: Vec::new(),
+            is_user_shell_command: false,
+        }),
+    );
+    assert_eq!(
+        ep.collect_thread_events(&begin),
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "sh -c 'exit 1'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: None,
+                    status: CommandExecutionStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    // End (failure) -> item.completed (item_0)
+    let end_fail = event(
+        "c2",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "2".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: String::new(),
+            exit_code: 1,
+            duration: Duration::from_millis(2),
+            formatted_output: String::new(),
+        }),
+    );
+    let out_fail = ep.collect_thread_events(&end_fail);
+    assert_eq!(
+        out_fail,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "sh -c 'exit 1'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: Some(1),
+                    status: CommandExecutionStatus::Failed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_without_begin_is_ignored() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // End event arrives without a prior Begin; should produce no thread events.
+    let end_only = event(
+        "c1",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "no-begin".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: String::new(),
+            exit_code: 0,
+            duration: Duration::from_millis(1),
+            formatted_output: String::new(),
+        }),
+    );
+    let out = ep.collect_thread_events(&end_only);
+    assert!(out.is_empty());
+}
+
+#[test]
+fn patch_apply_success_produces_item_completed_patchapply() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // Prepare a patch with multiple kinds of changes
+    let mut changes = std::collections::HashMap::new();
+    changes.insert(
+        PathBuf::from("a/added.txt"),
+        FileChange::Add {
+            content: "+hello".to_string(),
+        },
+    );
+    changes.insert(
+        PathBuf::from("b/deleted.txt"),
+        FileChange::Delete {
+            content: "-goodbye".to_string(),
+        },
+    );
+    changes.insert(
+        PathBuf::from("c/modified.txt"),
+        FileChange::Update {
+            unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
+            move_path: Some(PathBuf::from("c/renamed.txt")),
+        },
+    );
+
+    // Begin -> no output
+    let begin = event(
+        "p1",
+        EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+            call_id: "call-1".to_string(),
+            auto_approved: true,
+            changes: changes.clone(),
+        }),
+    );
+    let out_begin = ep.collect_thread_events(&begin);
+    assert!(out_begin.is_empty());
+
+    // End (success) -> item.completed (item_0)
+    let end = event(
+        "p2",
+        EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+            call_id: "call-1".to_string(),
+            stdout: "applied 3 changes".to_string(),
+            stderr: String::new(),
+            success: true,
+        }),
+    );
+    let out_end = ep.collect_thread_events(&end);
+    assert_eq!(out_end.len(), 1);
+
+    // Validate structure without relying on HashMap iteration order
+    match &out_end[0] {
+        ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
+            assert_eq!(&item.id, "item_0");
+            match &item.details {
+                ThreadItemDetails::FileChange(file_update) => {
+                    assert_eq!(file_update.status, PatchApplyStatus::Completed);
+
+                    let mut actual: Vec<(String, PatchChangeKind)> = file_update
+                        .changes
+                        .iter()
+                        .map(|c| (c.path.clone(), c.kind.clone()))
+                        .collect();
+                    actual.sort_by(|a, b| a.0.cmp(&b.0));
+
+                    let mut expected = vec![
+                        ("a/added.txt".to_string(), PatchChangeKind::Add),
+                        ("b/deleted.txt".to_string(), PatchChangeKind::Delete),
+                        ("c/modified.txt".to_string(), PatchChangeKind::Update),
+                    ];
+                    expected.sort_by(|a, b| a.0.cmp(&b.0));
+
+                    assert_eq!(actual, expected);
+                }
+                other => panic!("unexpected details: {other:?}"),
+            }
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn patch_apply_failure_produces_item_completed_patchapply_failed() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    let mut changes = std::collections::HashMap::new();
+    changes.insert(
+        PathBuf::from("file.txt"),
+        FileChange::Update {
+            unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
+            move_path: None,
+        },
+    );
+
+    // Begin -> no output
+    let begin = event(
+        "p1",
+        EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+            call_id: "call-2".to_string(),
+            auto_approved: false,
+            changes: changes.clone(),
+        }),
+    );
+    assert!(ep.collect_thread_events(&begin).is_empty());
+
+    // End (failure) -> item.completed (item_0) with Failed status
+    let end = event(
+        "p2",
+        EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+            call_id: "call-2".to_string(),
+            stdout: String::new(),
+            stderr: "failed to apply".to_string(),
+            success: false,
+        }),
+    );
+    let out_end = ep.collect_thread_events(&end);
+    assert_eq!(out_end.len(), 1);
+
+    match &out_end[0] {
+        ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
+            assert_eq!(&item.id, "item_0");
+            match &item.details {
+                ThreadItemDetails::FileChange(file_update) => {
+                    assert_eq!(file_update.status, PatchApplyStatus::Failed);
+                    assert_eq!(file_update.changes.len(), 1);
+                    assert_eq!(file_update.changes[0].path, "file.txt".to_string());
+                    assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
+                }
+                other => panic!("unexpected details: {other:?}"),
+            }
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn task_complete_produces_turn_completed_with_usage() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+
+    // First, feed a TokenCount event with known totals.
+    let usage = codex_core::protocol::TokenUsage {
+        input_tokens: 1200,
+        cached_input_tokens: 200,
+        output_tokens: 345,
+        reasoning_output_tokens: 0,
+        total_tokens: 0,
+    };
+    let info = codex_core::protocol::TokenUsageInfo {
+        total_token_usage: usage.clone(),
+        last_token_usage: usage,
+        model_context_window: None,
+    };
+    let token_count_event = event(
+        "e1",
+        EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
+            info: Some(info),
+            rate_limits: None,
+        }),
+    );
+    assert!(ep.collect_thread_events(&token_count_event).is_empty());
+
+    // Then TaskComplete should produce turn.completed with the captured usage.
+    let complete_event = event(
+        "e2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: Some("done".to_string()),
+        }),
+    );
+    let out = ep.collect_thread_events(&complete_event);
+    assert_eq!(
+        out,
+        vec![ThreadEvent::TurnCompleted(TurnCompletedEvent {
+            usage: Usage {
+                input_tokens: 1200,
+                cached_input_tokens: 200,
+                output_tokens: 345,
+            },
+        })]
+    );
+}
--- a/llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
+++ b/llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
@@ -0,0 +1,4 @@
+class BaseClass:
+  def method():
+
+    return True
--- a/llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
+++ b/llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
@@ -0,0 +1,10 @@
+event: response.created
+data: {"type":"response.created","response":{"id":"resp1"}}
+
+event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
+
+
--- a/llmx-rs/exec/tests/suite/apply_patch.rs
+++ b/llmx-rs/exec/tests/suite/apply_patch.rs
@@ -0,0 +1,151 @@
+#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]
+
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use codex_core::CODEX_APPLY_PATCH_ARG1;
+use core_test_support::responses::ev_apply_patch_custom_tool_call;
+use core_test_support::responses::ev_apply_patch_function_call;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_sequence;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use std::fs;
+use std::process::Command;
+use tempfile::tempdir;
+
+/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
+/// at some point, we must ensure that the smaller `codex-exec` CLI can still
+/// emulate the `apply_patch` CLI.
+#[test]
+fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let relative_path = "source.txt";
+    let absolute_path = tmp.path().join(relative_path);
+    fs::write(&absolute_path, "original content\n")?;
+
+    Command::cargo_bin("codex-exec")
+        .context("should find binary for codex-exec")?
+        .arg(CODEX_APPLY_PATCH_ARG1)
+        .arg(
+            r#"*** Begin Patch
+*** Update File: source.txt
+@@
+-original content
+modified by apply_patch
+*** End Patch"#,
+        )
+        .current_dir(tmp.path())
+        .assert()
+        .success()
+        .stdout("Success. Updated the following files:\nM source.txt\n")
+        .stderr(predicates::str::is_empty());
+    assert_eq!(
+        fs::read_to_string(absolute_path)?,
+        "modified by apply_patch\n"
+    );
+    Ok(())
+}
+
+#[cfg(not(target_os = "windows"))]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_apply_patch_tool() -> anyhow::Result<()> {
+    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;
+
+    skip_if_no_network!(Ok(()));
+
+    let test = test_codex_exec();
+    let tmp_path = test.cwd_path().to_path_buf();
+    let add_patch = r#"*** Begin Patch
+*** Add File: test.md
+Hello world
+*** End Patch"#;
+    let update_patch = r#"*** Begin Patch
+*** Update File: test.md
+@@
+-Hello world
+Final text
+*** End Patch"#;
+    let response_streams = vec![
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_0", add_patch),
+            ev_completed("request_0"),
+        ]),
+        sse(vec![
+            ev_apply_patch_function_call("request_1", update_patch),
+            ev_completed("request_1"),
+        ]),
+        sse(vec![ev_completed("request_2")]),
+    ];
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();
+
+    let final_path = tmp_path.join("test.md");
+    let contents = std::fs::read_to_string(&final_path)
+        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
+    assert_eq!(contents, "Final text\n");
+    Ok(())
+}
+
+#[cfg(not(target_os = "windows"))]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
+    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;
+
+    skip_if_no_network!(Ok(()));
+
+    let test = test_codex_exec();
+    let freeform_add_patch = r#"*** Begin Patch
+*** Add File: app.py
+class BaseClass:
+  def method():
+    return False
+*** End Patch"#;
+    let freeform_update_patch = r#"*** Begin Patch
+*** Update File: app.py
+@@  def method():
+-    return False
+
+    return True
+*** End Patch"#;
+    let response_streams = vec![
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
+            ev_completed("request_0"),
+        ]),
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
+            ev_completed("request_1"),
+        ]),
+        sse(vec![ev_completed("request_2")]),
+    ];
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();
+
+    // Verify final file contents
+    let final_path = test.cwd_path().join("app.py");
+    let contents = std::fs::read_to_string(&final_path)
+        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
+    assert_eq!(
+        contents,
+        include_str!("../fixtures/apply_patch_freeform_final.txt")
+    );
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/auth_env.rs
+++ b/llmx-rs/exec/tests/suite/auth_env.rs
@@ -0,0 +1,30 @@
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::header;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let server = start_mock_server().await;
+
+    mount_sse_once_match(
+        &server,
+        header("Authorization", "Bearer dummy"),
+        sse(vec![ev_completed("request_0")]),
+    )
+    .await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg("echo testing codex api key")
+        .assert()
+        .success();
+
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/mod.rs
+++ b/llmx-rs/exec/tests/suite/mod.rs
@@ -0,0 +1,8 @@
+// Aggregates all former standalone integration tests as modules.
+mod apply_patch;
+mod auth_env;
+mod originator;
+mod output_schema;
+mod resume;
+mod sandbox;
+mod server_error_exit;
--- a/llmx-rs/exec/tests/suite/originator.rs
+++ b/llmx-rs/exec/tests/suite/originator.rs
@@ -0,0 +1,52 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::header;
+
+/// Verify that when the server reports an error, `codex-exec` exits with a
+/// non-zero status code so automation can detect failures.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn send_codex_exec_originator() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("response_1"),
+        responses::ev_assistant_message("response_1", "Hello, world!"),
+        responses::ev_completed("response_1"),
+    ]);
+    responses::mount_sse_once_match(&server, header("Originator", "codex_exec"), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .assert()
+        .code(0);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn supports_originator_override() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("response_1"),
+        responses::ev_assistant_message("response_1", "Hello, world!"),
+        responses::ev_completed("response_1"),
+    ]);
+    responses::mount_sse_once_match(&server, header("Originator", "codex_exec_override"), body)
+        .await;
+
+    test.cmd_with_server(&server)
+        .env("CODEX_INTERNAL_ORIGINATOR_OVERRIDE", "codex_exec_override")
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .assert()
+        .code(0);
+
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/output_schema.rs
+++ b/llmx-rs/exec/tests/suite/output_schema.rs
@@ -0,0 +1,63 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::Value;
+use wiremock::matchers::any;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let schema_contents = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+    let schema_path = test.cwd_path().join("schema.json");
+    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
+    let expected_schema: Value = schema_contents;
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("resp1"),
+        responses::ev_assistant_message("m1", "fixture hello"),
+        responses::ev_completed("resp1"),
+    ]);
+    let response_mock = responses::mount_sse_once_match(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        // keep using -C in the test to exercise the flag as well
+        .arg("-C")
+        .arg(test.cwd_path())
+        .arg("--output-schema")
+        .arg(&schema_path)
+        .arg("-m")
+        .arg("gpt-5")
+        .arg("tell me a joke")
+        .assert()
+        .success();
+
+    let request = response_mock.single_request();
+    let payload: Value = request.body_json();
+    let text = payload.get("text").expect("request missing text field");
+    let format = text
+        .get("format")
+        .expect("request missing text.format field");
+    assert_eq!(
+        format,
+        &serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": expected_schema,
+        })
+    );
+
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/resume.rs
+++ b/llmx-rs/exec/tests/suite/resume.rs
@@ -0,0 +1,257 @@
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+use anyhow::Context;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::Value;
+use std::path::Path;
+use std::string::ToString;
+use uuid::Uuid;
+use walkdir::WalkDir;
+
+/// Utility: scan the sessions dir for a rollout file that contains `marker`
+/// in any response_item.message.content entry. Returns the absolute path.
+fn find_session_file_containing_marker(
+    sessions_dir: &std::path::Path,
+    marker: &str,
+) -> Option<std::path::PathBuf> {
+    for entry in WalkDir::new(sessions_dir) {
+        let entry = match entry {
+            Ok(e) => e,
+            Err(_) => continue,
+        };
+        if !entry.file_type().is_file() {
+            continue;
+        }
+        if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
+            continue;
+        }
+        let path = entry.path();
+        let Ok(content) = std::fs::read_to_string(path) else {
+            continue;
+        };
+        // Skip the first meta line and scan remaining JSONL entries.
+        let mut lines = content.lines();
+        if lines.next().is_none() {
+            continue;
+        }
+        for line in lines {
+            if line.trim().is_empty() {
+                continue;
+            }
+            let Ok(item): Result<Value, _> = serde_json::from_str(line) else {
+                continue;
+            };
+            if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
+                && let Some(payload) = item.get("payload")
+                && payload.get("type").and_then(|t| t.as_str()) == Some("message")
+                && payload
+                    .get("content")
+                    .map(ToString::to_string)
+                    .unwrap_or_default()
+                    .contains(marker)
+            {
+                return Some(path.to_path_buf());
+            }
+        }
+    }
+    None
+}
+
+/// Extract the conversation UUID from the first SessionMeta line in the rollout file.
+fn extract_conversation_id(path: &std::path::Path) -> String {
+    let content = std::fs::read_to_string(path).unwrap();
+    let mut lines = content.lines();
+    let meta_line = lines.next().expect("missing meta line");
+    let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json");
+    meta.get("payload")
+        .and_then(|p| p.get("id"))
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string()
+}
+
+#[test]
+fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let fixture =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
+
+    // 1) First run: create a session with a unique marker in the content.
+    let marker = format!("resume-last-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt)
+        .assert()
+        .success();
+
+    // Find the created session file containing the marker.
+    let sessions_dir = test.home_path().join("sessions");
+    let path = find_session_file_containing_marker(&sessions_dir, &marker)
+        .expect("no session file found after first run");
+
+    // 2) Second run: resume the most recent file with a new marker.
+    let marker2 = format!("resume-last-2-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2)
+        .arg("resume")
+        .arg("--last")
+        .assert()
+        .success();
+
+    // Ensure the same file was updated and contains both markers.
+    let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
+        .expect("no resumed session file containing marker2");
+    assert_eq!(
+        resumed_path, path,
+        "resume --last should append to existing file"
+    );
+    let content = std::fs::read_to_string(&resumed_path)?;
+    assert!(content.contains(&marker));
+    assert!(content.contains(&marker2));
+    Ok(())
+}
+
+#[test]
+fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let fixture =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
+
+    // 1) First run: create a session
+    let marker = format!("resume-by-id-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt)
+        .assert()
+        .success();
+
+    let sessions_dir = test.home_path().join("sessions");
+    let path = find_session_file_containing_marker(&sessions_dir, &marker)
+        .expect("no session file found after first run");
+    let session_id = extract_conversation_id(&path);
+    assert!(
+        !session_id.is_empty(),
+        "missing conversation id in meta line"
+    );
+
+    // 2) Resume by id
+    let marker2 = format!("resume-by-id-2-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2)
+        .arg("resume")
+        .arg(&session_id)
+        .assert()
+        .success();
+
+    let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
+        .expect("no resumed session file containing marker2");
+    assert_eq!(
+        resumed_path, path,
+        "resume by id should append to existing file"
+    );
+    let content = std::fs::read_to_string(&resumed_path)?;
+    assert!(content.contains(&marker));
+    assert!(content.contains(&marker2));
+    Ok(())
+}
+
+#[test]
+fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+    let fixture =
+        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
+
+    let marker = format!("resume-config-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    test.cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("--sandbox")
+        .arg("workspace-write")
+        .arg("--model")
+        .arg("gpt-5")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt)
+        .assert()
+        .success();
+
+    let sessions_dir = test.home_path().join("sessions");
+    let path = find_session_file_containing_marker(&sessions_dir, &marker)
+        .expect("no session file found after first run");
+
+    let marker2 = format!("resume-config-2-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+
+    let output = test
+        .cmd()
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        .arg("--skip-git-repo-check")
+        .arg("--sandbox")
+        .arg("workspace-write")
+        .arg("--model")
+        .arg("gpt-5-high")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2)
+        .arg("resume")
+        .arg("--last")
+        .output()
+        .context("resume run should succeed")?;
+
+    assert!(output.status.success(), "resume run failed: {output:?}");
+
+    let stderr = String::from_utf8(output.stderr)?;
+    assert!(
+        stderr.contains("model: gpt-5-high"),
+        "stderr missing model override: {stderr}"
+    );
+    if cfg!(target_os = "windows") {
+        assert!(
+            stderr.contains("sandbox: read-only"),
+            "stderr missing downgraded sandbox note: {stderr}"
+        );
+    } else {
+        assert!(
+            stderr.contains("sandbox: workspace-write"),
+            "stderr missing sandbox override: {stderr}"
+        );
+    }
+
+    let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
+        .expect("no resumed session file containing marker2");
+    assert_eq!(resumed_path, path, "resume should append to same file");
+
+    let content = std::fs::read_to_string(&resumed_path)?;
+    assert!(content.contains(&marker));
+    assert!(content.contains(&marker2));
+    Ok(())
+}
--- a/llmx-rs/exec/tests/suite/sandbox.rs
+++ b/llmx-rs/exec/tests/suite/sandbox.rs
@@ -0,0 +1,322 @@
+#![cfg(unix)]
+use codex_core::protocol::SandboxPolicy;
+use codex_core::spawn::StdioPolicy;
+use std::collections::HashMap;
+use std::future::Future;
+use std::io;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::ExitStatus;
+use tokio::fs::create_dir_all;
+use tokio::process::Child;
+
+#[cfg(target_os = "macos")]
+async fn spawn_command_under_sandbox(
+    command: Vec<String>,
+    command_cwd: PathBuf,
+    sandbox_policy: &SandboxPolicy,
+    sandbox_cwd: &Path,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    use codex_core::seatbelt::spawn_command_under_seatbelt;
+    spawn_command_under_seatbelt(
+        command,
+        command_cwd,
+        sandbox_policy,
+        sandbox_cwd,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+#[cfg(target_os = "linux")]
+async fn spawn_command_under_sandbox(
+    command: Vec<String>,
+    command_cwd: PathBuf,
+    sandbox_policy: &SandboxPolicy,
+    sandbox_cwd: &Path,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    use codex_core::landlock::spawn_command_under_linux_sandbox;
+    let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
+    spawn_command_under_linux_sandbox(
+        codex_linux_sandbox_exe,
+        command,
+        command_cwd,
+        sandbox_policy,
+        sandbox_cwd,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+#[tokio::test]
+async fn python_multiprocessing_lock_works_under_sandbox() {
+    core_test_support::skip_if_sandbox!();
+    #[cfg(target_os = "macos")]
+    let writable_roots = Vec::<PathBuf>::new();
+
+    // From https://man7.org/linux/man-pages/man7/sem_overview.7.html
+    //
+    // > On Linux, named semaphores are created in a virtual filesystem,
+    // > normally mounted under /dev/shm.
+    #[cfg(target_os = "linux")]
+    let writable_roots = vec![PathBuf::from("/dev/shm")];
+
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots,
+        network_access: false,
+        exclude_tmpdir_env_var: false,
+        exclude_slash_tmp: false,
+    };
+
+    let python_code = r#"import multiprocessing
+from multiprocessing import Lock, Process
+
+def f(lock):
+    with lock:
+        print("Lock acquired in child process")
+
+if __name__ == '__main__':
+    lock = Lock()
+    p = Process(target=f, args=(lock,))
+    p.start()
+    p.join()
+"#;
+
+    let command_cwd = std::env::current_dir().expect("should be able to get current dir");
+    let sandbox_cwd = command_cwd.clone();
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "python3".to_string(),
+            "-c".to_string(),
+            python_code.to_string(),
+        ],
+        command_cwd,
+        &policy,
+        sandbox_cwd.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should be able to spawn python under sandbox");
+
+    let status = child.wait().await.expect("should wait for child process");
+    assert!(status.success(), "python exited with {status:?}");
+}
+
+#[tokio::test]
+async fn sandbox_distinguishes_command_and_policy_cwds() {
+    core_test_support::skip_if_sandbox!();
+    let temp = tempfile::tempdir().expect("should be able to create temp dir");
+    let sandbox_root = temp.path().join("sandbox");
+    let command_root = temp.path().join("command");
+    create_dir_all(&sandbox_root).await.expect("mkdir");
+    create_dir_all(&command_root).await.expect("mkdir");
+    let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
+        .await
+        .expect("canonicalize sandbox root");
+    let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
+
+    let disallowed_path = command_root.join("forbidden.txt");
+
+    // Note writable_roots is empty: verify that `canonical_allowed_path` is
+    // writable only because it is under the sandbox policy cwd, not because it
+    // is under a writable root.
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![],
+        network_access: false,
+        exclude_tmpdir_env_var: true,
+        exclude_slash_tmp: true,
+    };
+
+    // Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "echo forbidden > forbidden.txt".to_string(),
+        ],
+        command_root.clone(),
+        &policy,
+        canonical_sandbox_root.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should spawn command writing to forbidden path");
+
+    let status = child
+        .wait()
+        .await
+        .expect("should wait for forbidden command");
+    assert!(
+        !status.success(),
+        "sandbox unexpectedly allowed writing to command cwd: {status:?}"
+    );
+    let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
+        .await
+        .expect("try_exists failed");
+    assert!(
+        !forbidden_exists,
+        "forbidden path should not have been created"
+    );
+
+    // Writing to the sandbox policy cwd after changing directories into it should succeed.
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "/usr/bin/touch".to_string(),
+            canonical_allowed_path.to_string_lossy().into_owned(),
+        ],
+        command_root,
+        &policy,
+        canonical_sandbox_root.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should spawn command writing to sandbox root");
+
+    let status = child.wait().await.expect("should wait for allowed command");
+    assert!(
+        status.success(),
+        "sandbox blocked allowed write: {status:?}"
+    );
+    let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
+        .await
+        .expect("try_exists allowed failed");
+    assert!(allowed_exists, "allowed path should exist");
+}
+
+fn unix_sock_body() {
+    unsafe {
+        let mut fds = [0i32; 2];
+        let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
+        assert_eq!(
+            r,
+            0,
+            "socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
+            io::Error::last_os_error()
+        );
+
+        let msg = b"hello_unix";
+        // write() from one end (generic write is allowed)
+        let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
+        assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
+
+        // recvfrom() on the other end. We don’t need the address for socketpair,
+        // so we pass null pointers for src address.
+        let mut buf = [0u8; 64];
+        let recvd = libc::recvfrom(
+            fds[1],
+            buf.as_mut_ptr() as *mut libc::c_void,
+            buf.len(),
+            0,
+            std::ptr::null_mut(),
+            std::ptr::null_mut(),
+        );
+        assert!(
+            recvd >= 0,
+            "recvfrom() failed: {}",
+            io::Error::last_os_error()
+        );
+
+        let recvd_slice = &buf[..(recvd as usize)];
+        assert_eq!(
+            recvd_slice,
+            &msg[..],
+            "payload mismatch: sent {} bytes, got {} bytes",
+            msg.len(),
+            recvd
+        );
+
+        // Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
+        let mut sfds = [0i32; 2];
+        let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
+        assert_eq!(
+            sr,
+            0,
+            "socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
+            io::Error::last_os_error()
+        );
+        let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
+        assert!(
+            snt2 >= 0,
+            "write(stream) failed: {}",
+            io::Error::last_os_error()
+        );
+        let mut b2 = [0u8; 64];
+        let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
+        assert!(
+            rcv2 >= 0,
+            "recv(stream) failed: {}",
+            io::Error::last_os_error()
+        );
+
+        // Clean up
+        let _ = libc::close(sfds[0]);
+        let _ = libc::close(sfds[1]);
+        let _ = libc::close(fds[0]);
+        let _ = libc::close(fds[1]);
+    }
+}
+
+#[tokio::test]
+async fn allow_unix_socketpair_recvfrom() {
+    run_code_under_sandbox(
+        "allow_unix_socketpair_recvfrom",
+        &SandboxPolicy::ReadOnly,
+        || async { unix_sock_body() },
+    )
+    .await
+    .expect("should be able to reexec");
+}
+
+const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
+
+#[expect(clippy::expect_used)]
+pub async fn run_code_under_sandbox<F, Fut>(
+    test_selector: &str,
+    policy: &SandboxPolicy,
+    child_body: F,
+) -> io::Result<Option<ExitStatus>>
+where
+    F: FnOnce() -> Fut + Send + 'static,
+    Fut: Future<Output = ()> + Send + 'static,
+{
+    if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
+        let exe = std::env::current_exe()?;
+        let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
+        let mut stdio_policy = StdioPolicy::RedirectForShellTool;
+        // Allow for us to pass forward --nocapture / use the right stdio policy.
+        if std::env::args().any(|a| a == "--nocapture") {
+            cmds.push("--nocapture".into());
+            stdio_policy = StdioPolicy::Inherit;
+        }
+        cmds.push(test_selector.into());
+
+        // Your existing launcher:
+        let command_cwd = std::env::current_dir().expect("should be able to get current dir");
+        let sandbox_cwd = command_cwd.clone();
+        let mut child = spawn_command_under_sandbox(
+            cmds,
+            command_cwd,
+            policy,
+            sandbox_cwd.as_path(),
+            stdio_policy,
+            HashMap::from([("IN_SANDBOX".into(), "1".into())]),
+        )
+        .await?;
+
+        let status = child.wait().await?;
+        Ok(Some(status))
+    } else {
+        // Child branch: run the provided body.
+        child_body().await;
+        Ok(None)
+    }
+}
--- a/llmx-rs/exec/tests/suite/server_error_exit.rs
+++ b/llmx-rs/exec/tests/suite/server_error_exit.rs
@@ -0,0 +1,34 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::any;
+
+/// Verify that when the server reports an error, `codex-exec` exits with a
+/// non-zero status code so automation can detect failures.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    // Mock a simple Responses API SSE stream that immediately reports a
+    // `response.failed` event with an error message.
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![serde_json::json!({
+        "type": "response.failed",
+        "response": {
+            "id": "resp_err_1",
+            "error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
+        }
+    })]);
+    responses::mount_sse_once_match(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .arg("--experimental-json")
+        .assert()
+        .code(1);
+
+    Ok(())
+}