Phase 1: Repository & Infrastructure Setup

- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
  - Root: llmx-monorepo
  - CLI: @llmx/llmx
  - SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx

🤖 Generated with Claude Code
This commit is contained in:
Sebastian Krüger
2025-11-11 14:01:52 +01:00
parent 052b052832
commit f237fe560d
1151 changed files with 41 additions and 35 deletions

62
llmx-rs/exec/Cargo.toml Normal file
View File

@@ -0,0 +1,62 @@
[package]
edition = "2024"
name = "codex-exec"
version = { workspace = true }
[[bin]]
name = "codex-exec"
path = "src/main.rs"
[lib]
name = "codex_exec"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
codex-arg0 = { workspace = true }
codex-common = { workspace = true, features = [
"cli",
"elapsed",
"sandbox_summary",
] }
codex-core = { workspace = true }
codex-ollama = { workspace = true }
codex-protocol = { workspace = true }
mcp-types = { workspace = true }
opentelemetry-appender-tracing = { workspace = true }
owo-colors = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
shlex = { workspace = true }
supports-color = { workspace = true }
tokio = { workspace = true, features = [
"io-std",
"macros",
"process",
"rt-multi-thread",
"signal",
] }
tracing = { workspace = true, features = ["log"] }
tracing-subscriber = { workspace = true, features = ["env-filter"] }
ts-rs = { workspace = true, features = [
"uuid-impl",
"serde-json-impl",
"no-serde-warnings",
] }
[dev-dependencies]
assert_cmd = { workspace = true }
core_test_support = { workspace = true }
libc = { workspace = true }
mcp-types = { workspace = true }
predicates = { workspace = true }
pretty_assertions = { workspace = true }
tempfile = { workspace = true }
uuid = { workspace = true }
walkdir = { workspace = true }
wiremock = { workspace = true }

109
llmx-rs/exec/src/cli.rs Normal file
View File

@@ -0,0 +1,109 @@
use clap::Parser;
use clap::ValueEnum;
use codex_common::CliConfigOverrides;
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(version)]
pub struct Cli {
/// Action to perform. If omitted, runs a new non-interactive session.
#[command(subcommand)]
pub command: Option<Command>,
/// Optional image(s) to attach to the initial prompt.
#[arg(long = "image", short = 'i', value_name = "FILE", value_delimiter = ',', num_args = 1..)]
pub images: Vec<PathBuf>,
/// Model the agent should use.
#[arg(long, short = 'm')]
pub model: Option<String>,
#[arg(long = "oss", default_value_t = false)]
pub oss: bool,
/// Select the sandbox policy to use when executing model-generated shell
/// commands.
#[arg(long = "sandbox", short = 's', value_enum)]
pub sandbox_mode: Option<codex_common::SandboxModeCliArg>,
/// Configuration profile from config.toml to specify default options.
#[arg(long = "profile", short = 'p')]
pub config_profile: Option<String>,
/// Convenience alias for low-friction sandboxed automatic execution (-a on-failure, --sandbox workspace-write).
#[arg(long = "full-auto", default_value_t = false)]
pub full_auto: bool,
/// Skip all confirmation prompts and execute commands without sandboxing.
/// EXTREMELY DANGEROUS. Intended solely for running in environments that are externally sandboxed.
#[arg(
long = "dangerously-bypass-approvals-and-sandbox",
alias = "yolo",
default_value_t = false,
conflicts_with = "full_auto"
)]
pub dangerously_bypass_approvals_and_sandbox: bool,
/// Tell the agent to use the specified directory as its working root.
#[clap(long = "cd", short = 'C', value_name = "DIR")]
pub cwd: Option<PathBuf>,
/// Allow running Codex outside a Git repository.
#[arg(long = "skip-git-repo-check", default_value_t = false)]
pub skip_git_repo_check: bool,
/// Path to a JSON Schema file describing the model's final response shape.
#[arg(long = "output-schema", value_name = "FILE")]
pub output_schema: Option<PathBuf>,
#[clap(skip)]
pub config_overrides: CliConfigOverrides,
/// Specifies color settings for use in the output.
#[arg(long = "color", value_enum, default_value_t = Color::Auto)]
pub color: Color,
/// Print events to stdout as JSONL.
#[arg(long = "json", alias = "experimental-json", default_value_t = false)]
pub json: bool,
/// Specifies file where the last message from the agent should be written.
#[arg(long = "output-last-message", short = 'o', value_name = "FILE")]
pub last_message_file: Option<PathBuf>,
/// Initial instructions for the agent. If not provided as an argument (or
/// if `-` is used), instructions are read from stdin.
#[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
pub prompt: Option<String>,
}
#[derive(Debug, clap::Subcommand)]
pub enum Command {
/// Resume a previous session by id or pick the most recent with --last.
Resume(ResumeArgs),
}
#[derive(Parser, Debug)]
pub struct ResumeArgs {
/// Conversation/session id (UUID). When provided, resumes this session.
/// If omitted, use --last to pick the most recent recorded session.
#[arg(value_name = "SESSION_ID")]
pub session_id: Option<String>,
/// Resume the most recent recorded session (newest) without specifying an id.
#[arg(long = "last", default_value_t = false, conflicts_with = "session_id")]
pub last: bool,
/// Prompt to send after resuming the session. If `-` is used, read from stdin.
#[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
pub prompt: Option<String>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, ValueEnum)]
#[value(rename_all = "kebab-case")]
pub enum Color {
Always,
Never,
#[default]
Auto,
}

View File

@@ -0,0 +1,45 @@
use std::path::Path;
use codex_core::config::Config;
use codex_core::protocol::Event;
use codex_core::protocol::SessionConfiguredEvent;
pub(crate) enum CodexStatus {
Running,
InitiateShutdown,
Shutdown,
}
pub(crate) trait EventProcessor {
/// Print summary of effective configuration and user prompt.
fn print_config_summary(
&mut self,
config: &Config,
prompt: &str,
session_configured: &SessionConfiguredEvent,
);
/// Handle a single event emitted by the agent.
fn process_event(&mut self, event: Event) -> CodexStatus;
fn print_final_output(&mut self) {}
}
pub(crate) fn handle_last_message(last_agent_message: Option<&str>, output_file: &Path) {
let message = last_agent_message.unwrap_or_default();
write_last_message_file(message, Some(output_file));
if last_agent_message.is_none() {
eprintln!(
"Warning: no last agent message; wrote empty content to {}",
output_file.display()
);
}
}
fn write_last_message_file(contents: &str, last_message_path: Option<&Path>) {
if let Some(path) = last_message_path
&& let Err(e) = std::fs::write(path, contents)
{
eprintln!("Failed to write last message file {path:?}: {e}");
}
}

View File

@@ -0,0 +1,599 @@
use codex_common::elapsed::format_duration;
use codex_common::elapsed::format_elapsed;
use codex_core::config::Config;
use codex_core::protocol::AgentMessageEvent;
use codex_core::protocol::AgentReasoningRawContentEvent;
use codex_core::protocol::BackgroundEventEvent;
use codex_core::protocol::DeprecationNoticeEvent;
use codex_core::protocol::ErrorEvent;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::FileChange;
use codex_core::protocol::McpInvocation;
use codex_core::protocol::McpToolCallBeginEvent;
use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::PatchApplyBeginEvent;
use codex_core::protocol::PatchApplyEndEvent;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::StreamErrorEvent;
use codex_core::protocol::TaskCompleteEvent;
use codex_core::protocol::TurnAbortReason;
use codex_core::protocol::TurnDiffEvent;
use codex_core::protocol::WarningEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::num_format::format_with_separators;
use owo_colors::OwoColorize;
use owo_colors::Style;
use shlex::try_join;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Instant;
use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
use crate::event_processor::handle_last_message;
use codex_common::create_config_summary_entries;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
/// This should be configurable. When used in CI, users may not want to impose
/// a limit so they can see the full transcript.
const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
pub(crate) struct EventProcessorWithHumanOutput {
call_id_to_patch: HashMap<String, PatchApplyBegin>,
// To ensure that --color=never is respected, ANSI escapes _must_ be added
// using .style() with one of these fields. If you need a new style, add a
// new field here.
bold: Style,
italic: Style,
dimmed: Style,
magenta: Style,
red: Style,
green: Style,
cyan: Style,
yellow: Style,
/// Whether to include `AgentReasoning` events in the output.
show_agent_reasoning: bool,
show_raw_agent_reasoning: bool,
last_message_path: Option<PathBuf>,
last_total_token_usage: Option<codex_core::protocol::TokenUsageInfo>,
final_message: Option<String>,
}
impl EventProcessorWithHumanOutput {
pub(crate) fn create_with_ansi(
with_ansi: bool,
config: &Config,
last_message_path: Option<PathBuf>,
) -> Self {
let call_id_to_patch = HashMap::new();
if with_ansi {
Self {
call_id_to_patch,
bold: Style::new().bold(),
italic: Style::new().italic(),
dimmed: Style::new().dimmed(),
magenta: Style::new().magenta(),
red: Style::new().red(),
green: Style::new().green(),
cyan: Style::new().cyan(),
yellow: Style::new().yellow(),
show_agent_reasoning: !config.hide_agent_reasoning,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
last_message_path,
last_total_token_usage: None,
final_message: None,
}
} else {
Self {
call_id_to_patch,
bold: Style::new(),
italic: Style::new(),
dimmed: Style::new(),
magenta: Style::new(),
red: Style::new(),
green: Style::new(),
cyan: Style::new(),
yellow: Style::new(),
show_agent_reasoning: !config.hide_agent_reasoning,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
last_message_path,
last_total_token_usage: None,
final_message: None,
}
}
}
}
struct PatchApplyBegin {
start_time: Instant,
auto_approved: bool,
}
/// Timestamped helper. The timestamp is styled with self.dimmed.
macro_rules! ts_msg {
($self:ident, $($arg:tt)*) => {{
eprintln!($($arg)*);
}};
}
impl EventProcessor for EventProcessorWithHumanOutput {
/// Print a concise summary of the effective configuration that will be used
/// for the session. This mirrors the information shown in the TUI welcome
/// screen.
fn print_config_summary(
&mut self,
config: &Config,
prompt: &str,
session_configured_event: &SessionConfiguredEvent,
) {
const VERSION: &str = env!("CARGO_PKG_VERSION");
ts_msg!(
self,
"OpenAI Codex v{} (research preview)\n--------",
VERSION
);
let mut entries = create_config_summary_entries(config);
entries.push((
"session id",
session_configured_event.session_id.to_string(),
));
for (key, value) in entries {
eprintln!("{} {}", format!("{key}:").style(self.bold), value);
}
eprintln!("--------");
// Echo the prompt that will be sent to the agent so it is visible in the
// transcript/logs before any events come in. Note the prompt may have been
// read from stdin, so it may not be visible in the terminal otherwise.
ts_msg!(self, "{}\n{}", "user".style(self.cyan), prompt);
}
fn process_event(&mut self, event: Event) -> CodexStatus {
let Event { id: _, msg } = event;
match msg {
EventMsg::Error(ErrorEvent { message }) => {
let prefix = "ERROR:".style(self.red);
ts_msg!(self, "{prefix} {message}");
}
EventMsg::Warning(WarningEvent { message }) => {
ts_msg!(
self,
"{} {message}",
"warning:".style(self.yellow).style(self.bold)
);
}
EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary, details }) => {
ts_msg!(
self,
"{} {summary}",
"deprecated:".style(self.magenta).style(self.bold)
);
if let Some(details) = details {
ts_msg!(self, " {}", details.style(self.dimmed));
}
}
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
ts_msg!(self, "{}", message.style(self.dimmed));
}
EventMsg::StreamError(StreamErrorEvent { message }) => {
ts_msg!(self, "{}", message.style(self.dimmed));
}
EventMsg::TaskStarted(_) => {
// Ignore.
}
EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
let last_message = last_agent_message.as_deref();
if let Some(output_file) = self.last_message_path.as_deref() {
handle_last_message(last_message, output_file);
}
self.final_message = last_agent_message;
return CodexStatus::InitiateShutdown;
}
EventMsg::TokenCount(ev) => {
self.last_total_token_usage = ev.info;
}
EventMsg::AgentReasoningSectionBreak(_) => {
if !self.show_agent_reasoning {
return CodexStatus::Running;
}
eprintln!();
}
EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent { text }) => {
if self.show_raw_agent_reasoning {
ts_msg!(
self,
"{}\n{}",
"thinking".style(self.italic).style(self.magenta),
text,
);
}
}
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
ts_msg!(
self,
"{}\n{}",
"codex".style(self.italic).style(self.magenta),
message,
);
}
EventMsg::ExecCommandBegin(ExecCommandBeginEvent { command, cwd, .. }) => {
eprint!(
"{}\n{} in {}",
"exec".style(self.italic).style(self.magenta),
escape_command(&command).style(self.bold),
cwd.to_string_lossy(),
);
}
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
aggregated_output,
duration,
exit_code,
..
}) => {
let duration = format!(" in {}", format_duration(duration));
let truncated_output = aggregated_output
.lines()
.take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
.collect::<Vec<_>>()
.join("\n");
match exit_code {
0 => {
let title = format!(" succeeded{duration}:");
ts_msg!(self, "{}", title.style(self.green));
}
_ => {
let title = format!(" exited {exit_code}{duration}:");
ts_msg!(self, "{}", title.style(self.red));
}
}
eprintln!("{}", truncated_output.style(self.dimmed));
}
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: _,
invocation,
}) => {
ts_msg!(
self,
"{} {}",
"tool".style(self.magenta),
format_mcp_invocation(&invocation).style(self.bold),
);
}
EventMsg::McpToolCallEnd(tool_call_end_event) => {
let is_success = tool_call_end_event.is_success();
let McpToolCallEndEvent {
call_id: _,
result,
invocation,
duration,
} = tool_call_end_event;
let duration = format!(" in {}", format_duration(duration));
let status_str = if is_success { "success" } else { "failed" };
let title_style = if is_success { self.green } else { self.red };
let title = format!(
"{} {status_str}{duration}:",
format_mcp_invocation(&invocation)
);
ts_msg!(self, "{}", title.style(title_style));
if let Ok(res) = result {
let val: serde_json::Value = res.into();
let pretty =
serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
eprintln!("{}", line.style(self.dimmed));
}
}
}
EventMsg::WebSearchEnd(WebSearchEndEvent { call_id: _, query }) => {
ts_msg!(self, "🌐 Searched: {query}");
}
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id,
auto_approved,
changes,
}) => {
// Store metadata so we can calculate duration later when we
// receive the corresponding PatchApplyEnd event.
self.call_id_to_patch.insert(
call_id,
PatchApplyBegin {
start_time: Instant::now(),
auto_approved,
},
);
ts_msg!(
self,
"{}",
"file update".style(self.magenta).style(self.italic),
);
// Pretty-print the patch summary with colored diff markers so
// it's easy to scan in the terminal output.
for (path, change) in changes.iter() {
match change {
FileChange::Add { content } => {
let header = format!(
"{} {}",
format_file_change(change),
path.to_string_lossy()
);
eprintln!("{}", header.style(self.magenta));
for line in content.lines() {
eprintln!("{}", line.style(self.green));
}
}
FileChange::Delete { content } => {
let header = format!(
"{} {}",
format_file_change(change),
path.to_string_lossy()
);
eprintln!("{}", header.style(self.magenta));
for line in content.lines() {
eprintln!("{}", line.style(self.red));
}
}
FileChange::Update {
unified_diff,
move_path,
} => {
let header = if let Some(dest) = move_path {
format!(
"{} {} -> {}",
format_file_change(change),
path.to_string_lossy(),
dest.to_string_lossy()
)
} else {
format!("{} {}", format_file_change(change), path.to_string_lossy())
};
eprintln!("{}", header.style(self.magenta));
// Colorize diff lines. We keep file header lines
// (--- / +++) without extra coloring so they are
// still readable.
for diff_line in unified_diff.lines() {
if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
eprintln!("{}", diff_line.style(self.green));
} else if diff_line.starts_with('-')
&& !diff_line.starts_with("---")
{
eprintln!("{}", diff_line.style(self.red));
} else {
eprintln!("{diff_line}");
}
}
}
}
}
}
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id,
stdout,
stderr,
success,
..
}) => {
let patch_begin = self.call_id_to_patch.remove(&call_id);
// Compute duration and summary label similar to exec commands.
let (duration, label) = if let Some(PatchApplyBegin {
start_time,
auto_approved,
}) = patch_begin
{
(
format!(" in {}", format_elapsed(start_time)),
format!("apply_patch(auto_approved={auto_approved})"),
)
} else {
(String::new(), format!("apply_patch('{call_id}')"))
};
let (exit_code, output, title_style) = if success {
(0, stdout, self.green)
} else {
(1, stderr, self.red)
};
let title = format!("{label} exited {exit_code}{duration}:");
ts_msg!(self, "{}", title.style(title_style));
for line in output.lines() {
eprintln!("{}", line.style(self.dimmed));
}
}
EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => {
ts_msg!(
self,
"{}",
"file update:".style(self.magenta).style(self.italic)
);
eprintln!("{unified_diff}");
}
EventMsg::AgentReasoning(agent_reasoning_event) => {
if self.show_agent_reasoning {
ts_msg!(
self,
"{}\n{}",
"thinking".style(self.italic).style(self.magenta),
agent_reasoning_event.text,
);
}
}
EventMsg::SessionConfigured(session_configured_event) => {
let SessionConfiguredEvent {
session_id: conversation_id,
model,
reasoning_effort: _,
history_log_id: _,
history_entry_count: _,
initial_messages: _,
rollout_path: _,
} = session_configured_event;
ts_msg!(
self,
"{} {}",
"codex session".style(self.magenta).style(self.bold),
conversation_id.to_string().style(self.dimmed)
);
ts_msg!(self, "model: {}", model);
eprintln!();
}
EventMsg::PlanUpdate(plan_update_event) => {
let UpdatePlanArgs { explanation, plan } = plan_update_event;
// Header
ts_msg!(self, "{}", "Plan update".style(self.magenta));
// Optional explanation
if let Some(explanation) = explanation
&& !explanation.trim().is_empty()
{
ts_msg!(self, "{}", explanation.style(self.italic));
}
// Pretty-print the plan items with simple status markers.
for item in plan {
match item.status {
StepStatus::Completed => {
ts_msg!(self, " {} {}", "".style(self.green), item.step);
}
StepStatus::InProgress => {
ts_msg!(self, " {} {}", "".style(self.cyan), item.step);
}
StepStatus::Pending => {
ts_msg!(
self,
" {} {}",
"".style(self.dimmed),
item.step.style(self.dimmed)
);
}
}
}
}
EventMsg::ViewImageToolCall(view) => {
ts_msg!(
self,
"{} {}",
"viewed image".style(self.magenta),
view.path.display()
);
}
EventMsg::TurnAborted(abort_reason) => match abort_reason.reason {
TurnAbortReason::Interrupted => {
ts_msg!(self, "task interrupted");
}
TurnAbortReason::Replaced => {
ts_msg!(self, "task aborted: replaced by a new task");
}
TurnAbortReason::ReviewEnded => {
ts_msg!(self, "task aborted: review ended");
}
},
EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
EventMsg::WebSearchBegin(_)
| EventMsg::ExecApprovalRequest(_)
| EventMsg::ApplyPatchApprovalRequest(_)
| EventMsg::ExecCommandOutputDelta(_)
| EventMsg::GetHistoryEntryResponse(_)
| EventMsg::McpListToolsResponse(_)
| EventMsg::ListCustomPromptsResponse(_)
| EventMsg::RawResponseItem(_)
| EventMsg::UserMessage(_)
| EventMsg::EnteredReviewMode(_)
| EventMsg::ExitedReviewMode(_)
| EventMsg::AgentMessageDelta(_)
| EventMsg::AgentReasoningDelta(_)
| EventMsg::AgentReasoningRawContentDelta(_)
| EventMsg::ItemStarted(_)
| EventMsg::ItemCompleted(_)
| EventMsg::AgentMessageContentDelta(_)
| EventMsg::ReasoningContentDelta(_)
| EventMsg::ReasoningRawContentDelta(_)
| EventMsg::UndoCompleted(_)
| EventMsg::UndoStarted(_) => {}
}
CodexStatus::Running
}
fn print_final_output(&mut self) {
if let Some(usage_info) = &self.last_total_token_usage {
eprintln!(
"{}\n{}",
"tokens used".style(self.magenta).style(self.italic),
format_with_separators(usage_info.total_token_usage.blended_total())
);
}
// If the user has not piped the final message to a file, they will see
// it twice: once written to stderr as part of the normal event
// processing, and once here on stdout. We print the token summary above
// to help break up the output visually in that case.
#[allow(clippy::print_stdout)]
if let Some(message) = &self.final_message {
if message.ends_with('\n') {
print!("{message}");
} else {
println!("{message}");
}
}
}
}
fn escape_command(command: &[String]) -> String {
try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "))
}
fn format_file_change(change: &FileChange) -> &'static str {
match change {
FileChange::Add { .. } => "A",
FileChange::Delete { .. } => "D",
FileChange::Update {
move_path: Some(_), ..
} => "R",
FileChange::Update {
move_path: None, ..
} => "M",
}
}
fn format_mcp_invocation(invocation: &McpInvocation) -> String {
// Build fully-qualified tool name: server.tool
let fq_tool_name = format!("{}.{}", invocation.server, invocation.tool);
// Format arguments as compact JSON so they fit on one line.
let args_str = invocation
.arguments
.as_ref()
.map(|v: &serde_json::Value| serde_json::to_string(v).unwrap_or_else(|_| v.to_string()))
.unwrap_or_default();
if args_str.is_empty() {
format!("{fq_tool_name}()")
} else {
format!("{fq_tool_name}({args_str})")
}
}

View File

@@ -0,0 +1,501 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::atomic::AtomicU64;
use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
use crate::event_processor::handle_last_message;
use crate::exec_events::AgentMessageItem;
use crate::exec_events::CommandExecutionItem;
use crate::exec_events::CommandExecutionStatus;
use crate::exec_events::ErrorItem;
use crate::exec_events::FileChangeItem;
use crate::exec_events::FileUpdateChange;
use crate::exec_events::ItemCompletedEvent;
use crate::exec_events::ItemStartedEvent;
use crate::exec_events::ItemUpdatedEvent;
use crate::exec_events::McpToolCallItem;
use crate::exec_events::McpToolCallItemError;
use crate::exec_events::McpToolCallItemResult;
use crate::exec_events::McpToolCallStatus;
use crate::exec_events::PatchApplyStatus;
use crate::exec_events::PatchChangeKind;
use crate::exec_events::ReasoningItem;
use crate::exec_events::ThreadErrorEvent;
use crate::exec_events::ThreadEvent;
use crate::exec_events::ThreadItem;
use crate::exec_events::ThreadItemDetails;
use crate::exec_events::ThreadStartedEvent;
use crate::exec_events::TodoItem;
use crate::exec_events::TodoListItem;
use crate::exec_events::TurnCompletedEvent;
use crate::exec_events::TurnFailedEvent;
use crate::exec_events::TurnStartedEvent;
use crate::exec_events::Usage;
use crate::exec_events::WebSearchItem;
use codex_core::config::Config;
use codex_core::protocol::AgentMessageEvent;
use codex_core::protocol::AgentReasoningEvent;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::FileChange;
use codex_core::protocol::McpToolCallBeginEvent;
use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::PatchApplyBeginEvent;
use codex_core::protocol::PatchApplyEndEvent;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::TaskCompleteEvent;
use codex_core::protocol::TaskStartedEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use serde_json::Value as JsonValue;
use tracing::error;
use tracing::warn;
pub struct EventProcessorWithJsonOutput {
last_message_path: Option<PathBuf>,
next_event_id: AtomicU64,
// Tracks running commands by call_id, including the associated item id.
running_commands: HashMap<String, RunningCommand>,
running_patch_applies: HashMap<String, PatchApplyBeginEvent>,
// Tracks the todo list for the current turn (at most one per turn).
running_todo_list: Option<RunningTodoList>,
last_total_token_usage: Option<codex_core::protocol::TokenUsage>,
running_mcp_tool_calls: HashMap<String, RunningMcpToolCall>,
last_critical_error: Option<ThreadErrorEvent>,
}
#[derive(Debug, Clone)]
struct RunningCommand {
command: String,
item_id: String,
}
#[derive(Debug, Clone)]
struct RunningTodoList {
item_id: String,
items: Vec<TodoItem>,
}
#[derive(Debug, Clone)]
struct RunningMcpToolCall {
server: String,
tool: String,
item_id: String,
arguments: JsonValue,
}
impl EventProcessorWithJsonOutput {
pub fn new(last_message_path: Option<PathBuf>) -> Self {
Self {
last_message_path,
next_event_id: AtomicU64::new(0),
running_commands: HashMap::new(),
running_patch_applies: HashMap::new(),
running_todo_list: None,
last_total_token_usage: None,
running_mcp_tool_calls: HashMap::new(),
last_critical_error: None,
}
}
pub fn collect_thread_events(&mut self, event: &Event) -> Vec<ThreadEvent> {
match &event.msg {
EventMsg::SessionConfigured(ev) => self.handle_session_configured(ev),
EventMsg::AgentMessage(ev) => self.handle_agent_message(ev),
EventMsg::AgentReasoning(ev) => self.handle_reasoning_event(ev),
EventMsg::ExecCommandBegin(ev) => self.handle_exec_command_begin(ev),
EventMsg::ExecCommandEnd(ev) => self.handle_exec_command_end(ev),
EventMsg::McpToolCallBegin(ev) => self.handle_mcp_tool_call_begin(ev),
EventMsg::McpToolCallEnd(ev) => self.handle_mcp_tool_call_end(ev),
EventMsg::PatchApplyBegin(ev) => self.handle_patch_apply_begin(ev),
EventMsg::PatchApplyEnd(ev) => self.handle_patch_apply_end(ev),
EventMsg::WebSearchBegin(_) => Vec::new(),
EventMsg::WebSearchEnd(ev) => self.handle_web_search_end(ev),
EventMsg::TokenCount(ev) => {
if let Some(info) = &ev.info {
self.last_total_token_usage = Some(info.total_token_usage.clone());
}
Vec::new()
}
EventMsg::TaskStarted(ev) => self.handle_task_started(ev),
EventMsg::TaskComplete(_) => self.handle_task_complete(),
EventMsg::Error(ev) => {
let error = ThreadErrorEvent {
message: ev.message.clone(),
};
self.last_critical_error = Some(error.clone());
vec![ThreadEvent::Error(error)]
}
EventMsg::Warning(ev) => {
let item = ThreadItem {
id: self.get_next_item_id(),
details: ThreadItemDetails::Error(ErrorItem {
message: ev.message.clone(),
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
EventMsg::StreamError(ev) => vec![ThreadEvent::Error(ThreadErrorEvent {
message: ev.message.clone(),
})],
EventMsg::PlanUpdate(ev) => self.handle_plan_update(ev),
_ => Vec::new(),
}
}
fn get_next_item_id(&self) -> String {
format!(
"item_{}",
self.next_event_id
.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
)
}
fn handle_session_configured(&self, payload: &SessionConfiguredEvent) -> Vec<ThreadEvent> {
vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
thread_id: payload.session_id.to_string(),
})]
}
fn handle_web_search_end(&self, ev: &WebSearchEndEvent) -> Vec<ThreadEvent> {
let item = ThreadItem {
id: self.get_next_item_id(),
details: ThreadItemDetails::WebSearch(WebSearchItem {
query: ev.query.clone(),
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
fn handle_agent_message(&self, payload: &AgentMessageEvent) -> Vec<ThreadEvent> {
let item = ThreadItem {
id: self.get_next_item_id(),
details: ThreadItemDetails::AgentMessage(AgentMessageItem {
text: payload.message.clone(),
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
fn handle_reasoning_event(&self, ev: &AgentReasoningEvent) -> Vec<ThreadEvent> {
let item = ThreadItem {
id: self.get_next_item_id(),
details: ThreadItemDetails::Reasoning(ReasoningItem {
text: ev.text.clone(),
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
fn handle_exec_command_begin(&mut self, ev: &ExecCommandBeginEvent) -> Vec<ThreadEvent> {
let item_id = self.get_next_item_id();
let command_string = match shlex::try_join(ev.command.iter().map(String::as_str)) {
Ok(command_string) => command_string,
Err(e) => {
warn!(
call_id = ev.call_id,
"Failed to stringify command: {e:?}; skipping item.started"
);
ev.command.join(" ")
}
};
self.running_commands.insert(
ev.call_id.clone(),
RunningCommand {
command: command_string.clone(),
item_id: item_id.clone(),
},
);
let item = ThreadItem {
id: item_id,
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: command_string,
aggregated_output: String::new(),
exit_code: None,
status: CommandExecutionStatus::InProgress,
}),
};
vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
}
fn handle_mcp_tool_call_begin(&mut self, ev: &McpToolCallBeginEvent) -> Vec<ThreadEvent> {
let item_id = self.get_next_item_id();
let server = ev.invocation.server.clone();
let tool = ev.invocation.tool.clone();
let arguments = ev.invocation.arguments.clone().unwrap_or(JsonValue::Null);
self.running_mcp_tool_calls.insert(
ev.call_id.clone(),
RunningMcpToolCall {
server: server.clone(),
tool: tool.clone(),
item_id: item_id.clone(),
arguments: arguments.clone(),
},
);
let item = ThreadItem {
id: item_id,
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server,
tool,
arguments,
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
};
vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
}
fn handle_mcp_tool_call_end(&mut self, ev: &McpToolCallEndEvent) -> Vec<ThreadEvent> {
let status = if ev.is_success() {
McpToolCallStatus::Completed
} else {
McpToolCallStatus::Failed
};
let (server, tool, item_id, arguments) =
match self.running_mcp_tool_calls.remove(&ev.call_id) {
Some(running) => (
running.server,
running.tool,
running.item_id,
running.arguments,
),
None => {
warn!(
call_id = ev.call_id,
"Received McpToolCallEnd without begin; synthesizing new item"
);
(
ev.invocation.server.clone(),
ev.invocation.tool.clone(),
self.get_next_item_id(),
ev.invocation.arguments.clone().unwrap_or(JsonValue::Null),
)
}
};
let (result, error) = match &ev.result {
Ok(value) => {
let result = McpToolCallItemResult {
content: value.content.clone(),
structured_content: value.structured_content.clone(),
};
(Some(result), None)
}
Err(message) => (
None,
Some(McpToolCallItemError {
message: message.clone(),
}),
),
};
let item = ThreadItem {
id: item_id,
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server,
tool,
arguments,
result,
error,
status,
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
fn handle_patch_apply_begin(&mut self, ev: &PatchApplyBeginEvent) -> Vec<ThreadEvent> {
self.running_patch_applies
.insert(ev.call_id.clone(), ev.clone());
Vec::new()
}
fn map_change_kind(&self, kind: &FileChange) -> PatchChangeKind {
match kind {
FileChange::Add { .. } => PatchChangeKind::Add,
FileChange::Delete { .. } => PatchChangeKind::Delete,
FileChange::Update { .. } => PatchChangeKind::Update,
}
}
fn handle_patch_apply_end(&mut self, ev: &PatchApplyEndEvent) -> Vec<ThreadEvent> {
if let Some(running_patch_apply) = self.running_patch_applies.remove(&ev.call_id) {
let status = if ev.success {
PatchApplyStatus::Completed
} else {
PatchApplyStatus::Failed
};
let item = ThreadItem {
id: self.get_next_item_id(),
details: ThreadItemDetails::FileChange(FileChangeItem {
changes: running_patch_apply
.changes
.iter()
.map(|(path, change)| FileUpdateChange {
path: path.to_str().unwrap_or("").to_string(),
kind: self.map_change_kind(change),
})
.collect(),
status,
}),
};
return vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })];
}
Vec::new()
}
fn handle_exec_command_end(&mut self, ev: &ExecCommandEndEvent) -> Vec<ThreadEvent> {
let Some(RunningCommand { command, item_id }) = self.running_commands.remove(&ev.call_id)
else {
warn!(
call_id = ev.call_id,
"ExecCommandEnd without matching ExecCommandBegin; skipping item.completed"
);
return Vec::new();
};
let status = if ev.exit_code == 0 {
CommandExecutionStatus::Completed
} else {
CommandExecutionStatus::Failed
};
let item = ThreadItem {
id: item_id,
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command,
aggregated_output: ev.aggregated_output.clone(),
exit_code: Some(ev.exit_code),
status,
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
fn todo_items_from_plan(&self, args: &UpdatePlanArgs) -> Vec<TodoItem> {
args.plan
.iter()
.map(|p| TodoItem {
text: p.step.clone(),
completed: matches!(p.status, StepStatus::Completed),
})
.collect()
}
fn handle_plan_update(&mut self, args: &UpdatePlanArgs) -> Vec<ThreadEvent> {
let items = self.todo_items_from_plan(args);
if let Some(running) = &mut self.running_todo_list {
running.items = items.clone();
let item = ThreadItem {
id: running.item_id.clone(),
details: ThreadItemDetails::TodoList(TodoListItem { items }),
};
return vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent { item })];
}
let item_id = self.get_next_item_id();
self.running_todo_list = Some(RunningTodoList {
item_id: item_id.clone(),
items: items.clone(),
});
let item = ThreadItem {
id: item_id,
details: ThreadItemDetails::TodoList(TodoListItem { items }),
};
vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
}
fn handle_task_started(&mut self, _: &TaskStartedEvent) -> Vec<ThreadEvent> {
self.last_critical_error = None;
vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]
}
fn handle_task_complete(&mut self) -> Vec<ThreadEvent> {
let usage = if let Some(u) = &self.last_total_token_usage {
Usage {
input_tokens: u.input_tokens,
cached_input_tokens: u.cached_input_tokens,
output_tokens: u.output_tokens,
}
} else {
Usage::default()
};
let mut items = Vec::new();
if let Some(running) = self.running_todo_list.take() {
let item = ThreadItem {
id: running.item_id,
details: ThreadItemDetails::TodoList(TodoListItem {
items: running.items,
}),
};
items.push(ThreadEvent::ItemCompleted(ItemCompletedEvent { item }));
}
if let Some(error) = self.last_critical_error.take() {
items.push(ThreadEvent::TurnFailed(TurnFailedEvent { error }));
} else {
items.push(ThreadEvent::TurnCompleted(TurnCompletedEvent { usage }));
}
items
}
}
impl EventProcessor for EventProcessorWithJsonOutput {
fn print_config_summary(&mut self, _: &Config, _: &str, ev: &SessionConfiguredEvent) {
self.process_event(Event {
id: "".to_string(),
msg: EventMsg::SessionConfigured(ev.clone()),
});
}
#[allow(clippy::print_stdout)]
fn process_event(&mut self, event: Event) -> CodexStatus {
let aggregated = self.collect_thread_events(&event);
for conv_event in aggregated {
match serde_json::to_string(&conv_event) {
Ok(line) => {
println!("{line}");
}
Err(e) => {
error!("Failed to serialize event: {e:?}");
}
}
}
let Event { msg, .. } = event;
if let EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) = msg {
if let Some(output_file) = self.last_message_path.as_deref() {
handle_last_message(last_agent_message.as_deref(), output_file);
}
CodexStatus::InitiateShutdown
} else {
CodexStatus::Running
}
}
}

View File

@@ -0,0 +1,246 @@
use mcp_types::ContentBlock as McpContentBlock;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
use ts_rs::TS;
/// Top-level JSONL events emitted by codex exec
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(tag = "type")]
pub enum ThreadEvent {
/// Emitted when a new thread is started as the first event.
#[serde(rename = "thread.started")]
ThreadStarted(ThreadStartedEvent),
/// Emitted when a turn is started by sending a new prompt to the model.
/// A turn encompasses all events that happen while agent is processing the prompt.
#[serde(rename = "turn.started")]
TurnStarted(TurnStartedEvent),
/// Emitted when a turn is completed. Typically right after the assistant's response.
#[serde(rename = "turn.completed")]
TurnCompleted(TurnCompletedEvent),
/// Indicates that a turn failed with an error.
#[serde(rename = "turn.failed")]
TurnFailed(TurnFailedEvent),
/// Emitted when a new item is added to the thread. Typically the item will be in an "in progress" state.
#[serde(rename = "item.started")]
ItemStarted(ItemStartedEvent),
/// Emitted when an item is updated.
#[serde(rename = "item.updated")]
ItemUpdated(ItemUpdatedEvent),
/// Signals that an item has reached a terminal state—either success or failure.
#[serde(rename = "item.completed")]
ItemCompleted(ItemCompletedEvent),
/// Represents an unrecoverable error emitted directly by the event stream.
#[serde(rename = "error")]
Error(ThreadErrorEvent),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadStartedEvent {
/// The identified of the new thread. Can be used to resume the thread later.
pub thread_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
pub struct TurnStartedEvent {}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TurnCompletedEvent {
pub usage: Usage,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TurnFailedEvent {
pub error: ThreadErrorEvent,
}
/// Describes the usage of tokens during a turn.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
pub struct Usage {
/// The number of input tokens used during the turn.
pub input_tokens: i64,
/// The number of cached input tokens used during the turn.
pub cached_input_tokens: i64,
/// The number of output tokens used during the turn.
pub output_tokens: i64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemStartedEvent {
pub item: ThreadItem,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemCompletedEvent {
pub item: ThreadItem,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemUpdatedEvent {
pub item: ThreadItem,
}
/// Fatal error emitted by the stream.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadErrorEvent {
pub message: String,
}
/// Canonical representation of a thread item and its domain-specific payload.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadItem {
pub id: String,
#[serde(flatten)]
pub details: ThreadItemDetails,
}
/// Typed payloads for each supported thread item type.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ThreadItemDetails {
/// Response from the agent.
/// Either a natural-language response or a JSON string when structured output is requested.
AgentMessage(AgentMessageItem),
/// Agent's reasoning summary.
Reasoning(ReasoningItem),
/// Tracks a command executed by the agent. The item starts when the command is
/// spawned, and completes when the process exits with an exit code.
CommandExecution(CommandExecutionItem),
/// Represents a set of file changes by the agent. The item is emitted only as a
/// completed event once the patch succeeds or fails.
FileChange(FileChangeItem),
/// Represents a call to an MCP tool. The item starts when the invocation is
/// dispatched and completes when the MCP server reports success or failure.
McpToolCall(McpToolCallItem),
/// Captures a web search request. It starts when the search is kicked off
/// and completes when results are returned to the agent.
WebSearch(WebSearchItem),
/// Tracks the agent's running to-do list. It starts when the plan is first
/// issued, updates as steps change state, and completes when the turn ends.
TodoList(TodoListItem),
/// Describes a non-fatal error surfaced as an item.
Error(ErrorItem),
}
/// Response from the agent.
/// Either a natural-language response or a JSON string when structured output is requested.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct AgentMessageItem {
pub text: String,
}
/// Agent's reasoning summary.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ReasoningItem {
pub text: String,
}
/// The status of a command execution.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
#[serde(rename_all = "snake_case")]
pub enum CommandExecutionStatus {
#[default]
InProgress,
Completed,
Failed,
}
/// A command executed by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct CommandExecutionItem {
pub command: String,
pub aggregated_output: String,
pub exit_code: Option<i32>,
pub status: CommandExecutionStatus,
}
/// A set of file changes by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct FileUpdateChange {
pub path: String,
pub kind: PatchChangeKind,
}
/// The status of a file change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum PatchApplyStatus {
Completed,
Failed,
}
/// A set of file changes by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct FileChangeItem {
pub changes: Vec<FileUpdateChange>,
pub status: PatchApplyStatus,
}
/// Indicates the type of the file change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum PatchChangeKind {
Add,
Delete,
Update,
}
/// The status of an MCP tool call.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
#[serde(rename_all = "snake_case")]
pub enum McpToolCallStatus {
#[default]
InProgress,
Completed,
Failed,
}
/// Result payload produced by an MCP tool invocation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItemResult {
pub content: Vec<McpContentBlock>,
pub structured_content: Option<JsonValue>,
}
/// Error details reported by a failed MCP tool invocation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItemError {
pub message: String,
}
/// A call to an MCP tool.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItem {
pub server: String,
pub tool: String,
#[serde(default)]
pub arguments: JsonValue,
pub result: Option<McpToolCallItemResult>,
pub error: Option<McpToolCallItemError>,
pub status: McpToolCallStatus,
}
/// A web search request.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct WebSearchItem {
pub query: String,
}
/// An error notification.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ErrorItem {
pub message: String,
}
/// An item in agent's to-do list.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TodoItem {
pub text: String,
pub completed: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TodoListItem {
pub items: Vec<TodoItem>,
}

427
llmx-rs/exec/src/lib.rs Normal file
View File

@@ -0,0 +1,427 @@
// - In the default output mode, it is paramount that the only thing written to
// stdout is the final message (if any).
// - In --json mode, stdout must be valid JSONL, one event per line.
// For both modes, any other output must be written to stderr.
#![deny(clippy::print_stdout)]
mod cli;
mod event_processor;
mod event_processor_with_human_output;
pub mod event_processor_with_jsonl_output;
pub mod exec_events;
pub use cli::Cli;
use codex_core::AuthManager;
use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
use codex_core::ConversationManager;
use codex_core::NewConversation;
use codex_core::auth::enforce_login_restrictions;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::git_info::get_git_repo_root;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SessionSource;
use codex_ollama::DEFAULT_OSS_MODEL;
use codex_protocol::config_types::SandboxMode;
use codex_protocol::user_input::UserInput;
use event_processor_with_human_output::EventProcessorWithHumanOutput;
use event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
use serde_json::Value;
use std::io::IsTerminal;
use std::io::Read;
use std::path::PathBuf;
use supports_color::Stream;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::prelude::*;
use crate::cli::Command as ExecCommand;
use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
use codex_core::default_client::set_default_originator;
use codex_core::find_conversation_path_by_id_str;
pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
if let Err(err) = set_default_originator("codex_exec".to_string()) {
tracing::warn!(?err, "Failed to set codex exec originator override {err:?}");
}
let Cli {
command,
images,
model: model_cli_arg,
oss,
config_profile,
full_auto,
dangerously_bypass_approvals_and_sandbox,
cwd,
skip_git_repo_check,
color,
last_message_file,
json: json_mode,
sandbox_mode: sandbox_mode_cli_arg,
prompt,
output_schema: output_schema_path,
config_overrides,
} = cli;
// Determine the prompt source (parent or subcommand) and read from stdin if needed.
let prompt_arg = match &command {
// Allow prompt before the subcommand by falling back to the parent-level prompt
// when the Resume subcommand did not provide its own prompt.
Some(ExecCommand::Resume(args)) => args.prompt.clone().or(prompt),
None => prompt,
};
let prompt = match prompt_arg {
Some(p) if p != "-" => p,
// Either `-` was passed or no positional arg.
maybe_dash => {
// When no arg (None) **and** stdin is a TTY, bail out early unless the
// user explicitly forced reading via `-`.
let force_stdin = matches!(maybe_dash.as_deref(), Some("-"));
if std::io::stdin().is_terminal() && !force_stdin {
eprintln!(
"No prompt provided. Either specify one as an argument or pipe the prompt into stdin."
);
std::process::exit(1);
}
// Ensure the user knows we are waiting on stdin, as they may
// have gotten into this state by mistake. If so, and they are not
// writing to stdin, Codex will hang indefinitely, so this should
// help them debug in that case.
if !force_stdin {
eprintln!("Reading prompt from stdin...");
}
let mut buffer = String::new();
if let Err(e) = std::io::stdin().read_to_string(&mut buffer) {
eprintln!("Failed to read prompt from stdin: {e}");
std::process::exit(1);
} else if buffer.trim().is_empty() {
eprintln!("No prompt provided via stdin.");
std::process::exit(1);
}
buffer
}
};
let output_schema = load_output_schema(output_schema_path);
let (stdout_with_ansi, stderr_with_ansi) = match color {
cli::Color::Always => (true, true),
cli::Color::Never => (false, false),
cli::Color::Auto => (
supports_color::on_cached(Stream::Stdout).is_some(),
supports_color::on_cached(Stream::Stderr).is_some(),
),
};
// Build fmt layer (existing logging) to compose with OTEL layer.
let default_level = "error";
// Build env_filter separately and attach via with_filter.
let env_filter = EnvFilter::try_from_default_env()
.or_else(|_| EnvFilter::try_new(default_level))
.unwrap_or_else(|_| EnvFilter::new(default_level));
let fmt_layer = tracing_subscriber::fmt::layer()
.with_ansi(stderr_with_ansi)
.with_writer(std::io::stderr)
.with_filter(env_filter);
let sandbox_mode = if full_auto {
Some(SandboxMode::WorkspaceWrite)
} else if dangerously_bypass_approvals_and_sandbox {
Some(SandboxMode::DangerFullAccess)
} else {
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
};
// When using `--oss`, let the bootstrapper pick the model (defaulting to
// gpt-oss:20b) and ensure it is present locally. Also, force the builtin
// `oss` model provider.
let model = if let Some(model) = model_cli_arg {
Some(model)
} else if oss {
Some(DEFAULT_OSS_MODEL.to_owned())
} else {
None // No model specified, will use the default.
};
let model_provider = if oss {
Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
} else {
None // No specific model provider override.
};
// Load configuration and determine approval policy
let overrides = ConfigOverrides {
model,
review_model: None,
config_profile,
// Default to never ask for approvals in headless mode. Feature flags can override.
approval_policy: Some(AskForApproval::Never),
sandbox_mode,
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
model_provider,
codex_linux_sandbox_exe,
base_instructions: None,
developer_instructions: None,
compact_prompt: None,
include_apply_patch_tool: None,
show_raw_agent_reasoning: oss.then_some(true),
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};
// Parse `-c` overrides.
let cli_kv_overrides = match config_overrides.parse_overrides() {
Ok(v) => v,
Err(e) => {
eprintln!("Error parsing -c overrides: {e}");
std::process::exit(1);
}
};
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
if let Err(err) = enforce_login_restrictions(&config).await {
eprintln!("{err}");
std::process::exit(1);
}
let otel = codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION"));
#[allow(clippy::print_stderr)]
let otel = match otel {
Ok(otel) => otel,
Err(e) => {
eprintln!("Could not create otel exporter: {e}");
std::process::exit(1);
}
};
if let Some(provider) = otel.as_ref() {
let otel_layer = OpenTelemetryTracingBridge::new(&provider.logger).with_filter(
tracing_subscriber::filter::filter_fn(codex_core::otel_init::codex_export_filter),
);
let _ = tracing_subscriber::registry()
.with(fmt_layer)
.with(otel_layer)
.try_init();
} else {
let _ = tracing_subscriber::registry().with(fmt_layer).try_init();
}
let mut event_processor: Box<dyn EventProcessor> = match json_mode {
true => Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone())),
_ => Box::new(EventProcessorWithHumanOutput::create_with_ansi(
stdout_with_ansi,
&config,
last_message_file.clone(),
)),
};
if oss {
codex_ollama::ensure_oss_ready(&config)
.await
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
}
let default_cwd = config.cwd.to_path_buf();
let default_approval_policy = config.approval_policy;
let default_sandbox_policy = config.sandbox_policy.clone();
let default_model = config.model.clone();
let default_effort = config.model_reasoning_effort;
let default_summary = config.model_reasoning_summary;
if !skip_git_repo_check && get_git_repo_root(&default_cwd).is_none() {
eprintln!("Not inside a trusted directory and --skip-git-repo-check was not specified.");
std::process::exit(1);
}
let auth_manager = AuthManager::shared(
config.codex_home.clone(),
true,
config.cli_auth_credentials_store_mode,
);
let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
// Handle resume subcommand by resolving a rollout path and using explicit resume API.
let NewConversation {
conversation_id: _,
conversation,
session_configured,
} = if let Some(ExecCommand::Resume(args)) = command {
let resume_path = resolve_resume_path(&config, &args).await?;
if let Some(path) = resume_path {
conversation_manager
.resume_conversation_from_rollout(config.clone(), path, auth_manager.clone())
.await?
} else {
conversation_manager
.new_conversation(config.clone())
.await?
}
} else {
conversation_manager
.new_conversation(config.clone())
.await?
};
// Print the effective configuration and prompt so users can see what Codex
// is using.
event_processor.print_config_summary(&config, &prompt, &session_configured);
info!("Codex initialized with event: {session_configured:?}");
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Event>();
{
let conversation = conversation.clone();
tokio::spawn(async move {
loop {
tokio::select! {
_ = tokio::signal::ctrl_c() => {
tracing::debug!("Keyboard interrupt");
// Immediately notify Codex to abort any inflight task.
conversation.submit(Op::Interrupt).await.ok();
// Exit the inner loop and return to the main input prompt. The codex
// will emit a `TurnInterrupted` (Error) event which is drained later.
break;
}
res = conversation.next_event() => match res {
Ok(event) => {
debug!("Received event: {event:?}");
let is_shutdown_complete = matches!(event.msg, EventMsg::ShutdownComplete);
if let Err(e) = tx.send(event) {
error!("Error sending event: {e:?}");
break;
}
if is_shutdown_complete {
info!("Received shutdown event, exiting event loop.");
break;
}
},
Err(e) => {
error!("Error receiving event: {e:?}");
break;
}
}
}
}
});
}
// Package images and prompt into a single user input turn.
let mut items: Vec<UserInput> = images
.into_iter()
.map(|path| UserInput::LocalImage { path })
.collect();
items.push(UserInput::Text { text: prompt });
let initial_prompt_task_id = conversation
.submit(Op::UserTurn {
items,
cwd: default_cwd,
approval_policy: default_approval_policy,
sandbox_policy: default_sandbox_policy,
model: default_model,
effort: default_effort,
summary: default_summary,
final_output_json_schema: output_schema,
})
.await?;
info!("Sent prompt with event ID: {initial_prompt_task_id}");
// Run the loop until the task is complete.
// Track whether a fatal error was reported by the server so we can
// exit with a non-zero status for automation-friendly signaling.
let mut error_seen = false;
while let Some(event) = rx.recv().await {
if matches!(event.msg, EventMsg::Error(_)) {
error_seen = true;
}
let shutdown: CodexStatus = event_processor.process_event(event);
match shutdown {
CodexStatus::Running => continue,
CodexStatus::InitiateShutdown => {
conversation.submit(Op::Shutdown).await?;
}
CodexStatus::Shutdown => {
break;
}
}
}
event_processor.print_final_output();
if error_seen {
std::process::exit(1);
}
Ok(())
}
async fn resolve_resume_path(
config: &Config,
args: &crate::cli::ResumeArgs,
) -> anyhow::Result<Option<PathBuf>> {
if args.last {
let default_provider_filter = vec![config.model_provider_id.clone()];
match codex_core::RolloutRecorder::list_conversations(
&config.codex_home,
1,
None,
&[],
Some(default_provider_filter.as_slice()),
&config.model_provider_id,
)
.await
{
Ok(page) => Ok(page.items.first().map(|it| it.path.clone())),
Err(e) => {
error!("Error listing conversations: {e}");
Ok(None)
}
}
} else if let Some(id_str) = args.session_id.as_deref() {
let path = find_conversation_path_by_id_str(&config.codex_home, id_str).await?;
Ok(path)
} else {
Ok(None)
}
}
fn load_output_schema(path: Option<PathBuf>) -> Option<Value> {
let path = path?;
let schema_str = match std::fs::read_to_string(&path) {
Ok(contents) => contents,
Err(err) => {
eprintln!(
"Failed to read output schema file {}: {err}",
path.display()
);
std::process::exit(1);
}
};
match serde_json::from_str::<Value>(&schema_str) {
Ok(value) => Some(value),
Err(err) => {
eprintln!(
"Output schema file {} is not valid JSON: {err}",
path.display()
);
std::process::exit(1);
}
}
}

40
llmx-rs/exec/src/main.rs Normal file
View File

@@ -0,0 +1,40 @@
//! Entry-point for the `codex-exec` binary.
//!
//! When this CLI is invoked normally, it parses the standard `codex-exec` CLI
//! options and launches the non-interactive Codex agent. However, if it is
//! invoked with arg0 as `codex-linux-sandbox`, we instead treat the invocation
//! as a request to run the logic for the standalone `codex-linux-sandbox`
//! executable (i.e., parse any -s args and then run a *sandboxed* command under
//! Landlock + seccomp.
//!
//! This allows us to ship a completely separate set of functionality as part
//! of the `codex-exec` binary.
use clap::Parser;
use codex_arg0::arg0_dispatch_or_else;
use codex_common::CliConfigOverrides;
use codex_exec::Cli;
use codex_exec::run_main;
#[derive(Parser, Debug)]
struct TopCli {
#[clap(flatten)]
config_overrides: CliConfigOverrides,
#[clap(flatten)]
inner: Cli,
}
fn main() -> anyhow::Result<()> {
arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
let top_cli = TopCli::parse();
// Merge root-level overrides into inner CLI struct so downstream logic remains unchanged.
let mut inner = top_cli.inner;
inner
.config_overrides
.raw_overrides
.splice(0..0, top_cli.config_overrides.raw_overrides);
run_main(inner, codex_linux_sandbox_exe).await?;
Ok(())
})
}

View File

@@ -0,0 +1,5 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;
mod event_processor_with_json_output;

View File

@@ -0,0 +1,939 @@
use codex_core::protocol::AgentMessageEvent;
use codex_core::protocol::AgentReasoningEvent;
use codex_core::protocol::ErrorEvent;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::FileChange;
use codex_core::protocol::McpInvocation;
use codex_core::protocol::McpToolCallBeginEvent;
use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::PatchApplyBeginEvent;
use codex_core::protocol::PatchApplyEndEvent;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::WarningEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
use codex_exec::exec_events::AgentMessageItem;
use codex_exec::exec_events::CommandExecutionItem;
use codex_exec::exec_events::CommandExecutionStatus;
use codex_exec::exec_events::ErrorItem;
use codex_exec::exec_events::ItemCompletedEvent;
use codex_exec::exec_events::ItemStartedEvent;
use codex_exec::exec_events::ItemUpdatedEvent;
use codex_exec::exec_events::McpToolCallItem;
use codex_exec::exec_events::McpToolCallItemError;
use codex_exec::exec_events::McpToolCallItemResult;
use codex_exec::exec_events::McpToolCallStatus;
use codex_exec::exec_events::PatchApplyStatus;
use codex_exec::exec_events::PatchChangeKind;
use codex_exec::exec_events::ReasoningItem;
use codex_exec::exec_events::ThreadErrorEvent;
use codex_exec::exec_events::ThreadEvent;
use codex_exec::exec_events::ThreadItem;
use codex_exec::exec_events::ThreadItemDetails;
use codex_exec::exec_events::ThreadStartedEvent;
use codex_exec::exec_events::TodoItem as ExecTodoItem;
use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
use codex_exec::exec_events::TurnCompletedEvent;
use codex_exec::exec_events::TurnFailedEvent;
use codex_exec::exec_events::TurnStartedEvent;
use codex_exec::exec_events::Usage;
use codex_exec::exec_events::WebSearchItem;
use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::PathBuf;
use std::time::Duration;
fn event(id: &str, msg: EventMsg) -> Event {
Event {
id: id.to_string(),
msg,
}
}
#[test]
fn session_configured_produces_thread_started_event() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let session_id =
codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")
.unwrap();
let rollout_path = PathBuf::from("/tmp/rollout.json");
let ev = event(
"e1",
EventMsg::SessionConfigured(SessionConfiguredEvent {
session_id,
model: "codex-mini-latest".to_string(),
reasoning_effort: None,
history_log_id: 0,
history_entry_count: 0,
initial_messages: None,
rollout_path,
}),
);
let out = ep.collect_thread_events(&ev);
assert_eq!(
out,
vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
thread_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
})]
);
}
#[test]
fn task_started_produces_turn_started_event() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"t1",
EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
model_context_window: Some(32_000),
}),
));
assert_eq!(out, vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]);
}
#[test]
fn web_search_end_emits_item_completed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let query = "rust async await".to_string();
let out = ep.collect_thread_events(&event(
"w1",
EventMsg::WebSearchEnd(WebSearchEndEvent {
call_id: "call-123".to_string(),
query: query.clone(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
},
})]
);
}
#[test]
fn plan_update_emits_todo_list_started_updated_and_completed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First plan update => item.started (todo_list)
let first = event(
"p1",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
step: "step one".to_string(),
status: StepStatus::Pending,
},
PlanItemArg {
step: "step two".to_string(),
status: StepStatus::InProgress,
},
],
}),
);
let out_first = ep.collect_thread_events(&first);
assert_eq!(
out_first,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::TodoList(ExecTodoListItem {
items: vec![
ExecTodoItem {
text: "step one".to_string(),
completed: false
},
ExecTodoItem {
text: "step two".to_string(),
completed: false
},
],
}),
},
})]
);
// Second plan update in same turn => item.updated (same id)
let second = event(
"p2",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
step: "step one".to_string(),
status: StepStatus::Completed,
},
PlanItemArg {
step: "step two".to_string(),
status: StepStatus::InProgress,
},
],
}),
);
let out_second = ep.collect_thread_events(&second);
assert_eq!(
out_second,
vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::TodoList(ExecTodoListItem {
items: vec![
ExecTodoItem {
text: "step one".to_string(),
completed: true
},
ExecTodoItem {
text: "step two".to_string(),
completed: false
},
],
}),
},
})]
);
// Task completes => item.completed (same id, latest state)
let complete = event(
"p3",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: None,
}),
);
let out_complete = ep.collect_thread_events(&complete);
assert_eq!(
out_complete,
vec![
ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::TodoList(ExecTodoListItem {
items: vec![
ExecTodoItem {
text: "step one".to_string(),
completed: true
},
ExecTodoItem {
text: "step two".to_string(),
completed: false
},
],
}),
},
}),
ThreadEvent::TurnCompleted(TurnCompletedEvent {
usage: Usage::default(),
}),
]
);
}
#[test]
fn mcp_tool_call_begin_and_end_emit_item_events() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: Some(json!({ "key": "value" })),
};
let begin = event(
"m1",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-1".to_string(),
invocation: invocation.clone(),
}),
);
let begin_events = ep.collect_thread_events(&begin);
assert_eq!(
begin_events,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
})]
);
let end = event(
"m2",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-1".to_string(),
invocation,
duration: Duration::from_secs(1),
result: Ok(CallToolResult {
content: Vec::new(),
is_error: None,
structured_content: None,
}),
}),
);
let end_events = ep.collect_thread_events(&end);
assert_eq!(
end_events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: Some(McpToolCallItemResult {
content: Vec::new(),
structured_content: None,
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
})]
);
}
#[test]
fn mcp_tool_call_failure_sets_failed_status() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_b".to_string(),
tool: "tool_y".to_string(),
arguments: Some(json!({ "param": 42 })),
};
let begin = event(
"m3",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-2".to_string(),
invocation: invocation.clone(),
}),
);
ep.collect_thread_events(&begin);
let end = event(
"m4",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-2".to_string(),
invocation,
duration: Duration::from_millis(5),
result: Err("tool exploded".to_string()),
}),
);
let events = ep.collect_thread_events(&end);
assert_eq!(
events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_b".to_string(),
tool: "tool_y".to_string(),
arguments: json!({ "param": 42 }),
result: None,
error: Some(McpToolCallItemError {
message: "tool exploded".to_string(),
}),
status: McpToolCallStatus::Failed,
}),
},
})]
);
}
#[test]
fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: None,
};
let begin = event(
"m5",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-3".to_string(),
invocation: invocation.clone(),
}),
);
let begin_events = ep.collect_thread_events(&begin);
assert_eq!(
begin_events,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
})]
);
let end = event(
"m6",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-3".to_string(),
invocation,
duration: Duration::from_millis(10),
result: Ok(CallToolResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
is_error: None,
structured_content: Some(json!({ "status": "ok" })),
}),
}),
);
let events = ep.collect_thread_events(&end);
assert_eq!(
events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: Some(McpToolCallItemResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
structured_content: Some(json!({ "status": "ok" })),
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
})]
);
}
#[test]
fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First turn: start + complete
let start = event(
"t1",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
step: "only".to_string(),
status: StepStatus::Pending,
}],
}),
);
let _ = ep.collect_thread_events(&start);
let complete = event(
"t2",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: None,
}),
);
let _ = ep.collect_thread_events(&complete);
// Second turn: a new todo list should have a new id
let start_again = event(
"t3",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
step: "again".to_string(),
status: StepStatus::Pending,
}],
}),
);
let out = ep.collect_thread_events(&start_again);
match &out[0] {
ThreadEvent::ItemStarted(ItemStartedEvent { item }) => {
assert_eq!(&item.id, "item_1");
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn agent_reasoning_produces_item_completed_reasoning() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let ev = event(
"e1",
EventMsg::AgentReasoning(AgentReasoningEvent {
text: "thinking...".to_string(),
}),
);
let out = ep.collect_thread_events(&ev);
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::Reasoning(ReasoningItem {
text: "thinking...".to_string(),
}),
},
})]
);
}
#[test]
fn agent_message_produces_item_completed_agent_message() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let ev = event(
"e1",
EventMsg::AgentMessage(AgentMessageEvent {
message: "hello".to_string(),
}),
);
let out = ep.collect_thread_events(&ev);
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::AgentMessage(AgentMessageItem {
text: "hello".to_string(),
}),
},
})]
);
}
#[test]
fn error_event_produces_error() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::Error(codex_core::protocol::ErrorEvent {
message: "boom".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::Error(ThreadErrorEvent {
message: "boom".to_string(),
})]
);
}
#[test]
fn warning_event_produces_error_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::Warning(WarningEvent {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::Error(ErrorItem {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
}),
},
})]
);
}
#[test]
fn stream_error_event_produces_error() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
message: "retrying".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::Error(ThreadErrorEvent {
message: "retrying".to_string(),
})]
);
}
#[test]
fn error_followed_by_task_complete_produces_turn_failed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let error_event = event(
"e1",
EventMsg::Error(ErrorEvent {
message: "boom".to_string(),
}),
);
assert_eq!(
ep.collect_thread_events(&error_event),
vec![ThreadEvent::Error(ThreadErrorEvent {
message: "boom".to_string(),
})]
);
let complete_event = event(
"e2",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: None,
}),
);
assert_eq!(
ep.collect_thread_events(&complete_event),
vec![ThreadEvent::TurnFailed(TurnFailedEvent {
error: ThreadErrorEvent {
message: "boom".to_string(),
},
})]
);
}
#[test]
fn exec_command_end_success_produces_completed_command_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// Begin -> no output
let begin = event(
"c1",
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: "1".to_string(),
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
cwd: std::env::current_dir().unwrap(),
parsed_cmd: Vec::new(),
is_user_shell_command: false,
}),
);
let out_begin = ep.collect_thread_events(&begin);
assert_eq!(
out_begin,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "bash -lc 'echo hi'".to_string(),
aggregated_output: String::new(),
exit_code: None,
status: CommandExecutionStatus::InProgress,
}),
},
})]
);
// End (success) -> item.completed (item_0)
let end_ok = event(
"c2",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "1".to_string(),
stdout: String::new(),
stderr: String::new(),
aggregated_output: "hi\n".to_string(),
exit_code: 0,
duration: Duration::from_millis(5),
formatted_output: String::new(),
}),
);
let out_ok = ep.collect_thread_events(&end_ok);
assert_eq!(
out_ok,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "bash -lc 'echo hi'".to_string(),
aggregated_output: "hi\n".to_string(),
exit_code: Some(0),
status: CommandExecutionStatus::Completed,
}),
},
})]
);
}
#[test]
fn exec_command_end_failure_produces_failed_command_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// Begin -> no output
let begin = event(
"c1",
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: "2".to_string(),
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
cwd: std::env::current_dir().unwrap(),
parsed_cmd: Vec::new(),
is_user_shell_command: false,
}),
);
assert_eq!(
ep.collect_thread_events(&begin),
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "sh -c 'exit 1'".to_string(),
aggregated_output: String::new(),
exit_code: None,
status: CommandExecutionStatus::InProgress,
}),
},
})]
);
// End (failure) -> item.completed (item_0)
let end_fail = event(
"c2",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "2".to_string(),
stdout: String::new(),
stderr: String::new(),
aggregated_output: String::new(),
exit_code: 1,
duration: Duration::from_millis(2),
formatted_output: String::new(),
}),
);
let out_fail = ep.collect_thread_events(&end_fail);
assert_eq!(
out_fail,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
command: "sh -c 'exit 1'".to_string(),
aggregated_output: String::new(),
exit_code: Some(1),
status: CommandExecutionStatus::Failed,
}),
},
})]
);
}
#[test]
fn exec_command_end_without_begin_is_ignored() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// End event arrives without a prior Begin; should produce no thread events.
let end_only = event(
"c1",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "no-begin".to_string(),
stdout: String::new(),
stderr: String::new(),
aggregated_output: String::new(),
exit_code: 0,
duration: Duration::from_millis(1),
formatted_output: String::new(),
}),
);
let out = ep.collect_thread_events(&end_only);
assert!(out.is_empty());
}
#[test]
fn patch_apply_success_produces_item_completed_patchapply() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// Prepare a patch with multiple kinds of changes
let mut changes = std::collections::HashMap::new();
changes.insert(
PathBuf::from("a/added.txt"),
FileChange::Add {
content: "+hello".to_string(),
},
);
changes.insert(
PathBuf::from("b/deleted.txt"),
FileChange::Delete {
content: "-goodbye".to_string(),
},
);
changes.insert(
PathBuf::from("c/modified.txt"),
FileChange::Update {
unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
move_path: Some(PathBuf::from("c/renamed.txt")),
},
);
// Begin -> no output
let begin = event(
"p1",
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: "call-1".to_string(),
auto_approved: true,
changes: changes.clone(),
}),
);
let out_begin = ep.collect_thread_events(&begin);
assert!(out_begin.is_empty());
// End (success) -> item.completed (item_0)
let end = event(
"p2",
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: "call-1".to_string(),
stdout: "applied 3 changes".to_string(),
stderr: String::new(),
success: true,
}),
);
let out_end = ep.collect_thread_events(&end);
assert_eq!(out_end.len(), 1);
// Validate structure without relying on HashMap iteration order
match &out_end[0] {
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
assert_eq!(&item.id, "item_0");
match &item.details {
ThreadItemDetails::FileChange(file_update) => {
assert_eq!(file_update.status, PatchApplyStatus::Completed);
let mut actual: Vec<(String, PatchChangeKind)> = file_update
.changes
.iter()
.map(|c| (c.path.clone(), c.kind.clone()))
.collect();
actual.sort_by(|a, b| a.0.cmp(&b.0));
let mut expected = vec![
("a/added.txt".to_string(), PatchChangeKind::Add),
("b/deleted.txt".to_string(), PatchChangeKind::Delete),
("c/modified.txt".to_string(), PatchChangeKind::Update),
];
expected.sort_by(|a, b| a.0.cmp(&b.0));
assert_eq!(actual, expected);
}
other => panic!("unexpected details: {other:?}"),
}
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn patch_apply_failure_produces_item_completed_patchapply_failed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let mut changes = std::collections::HashMap::new();
changes.insert(
PathBuf::from("file.txt"),
FileChange::Update {
unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
move_path: None,
},
);
// Begin -> no output
let begin = event(
"p1",
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: "call-2".to_string(),
auto_approved: false,
changes: changes.clone(),
}),
);
assert!(ep.collect_thread_events(&begin).is_empty());
// End (failure) -> item.completed (item_0) with Failed status
let end = event(
"p2",
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: "call-2".to_string(),
stdout: String::new(),
stderr: "failed to apply".to_string(),
success: false,
}),
);
let out_end = ep.collect_thread_events(&end);
assert_eq!(out_end.len(), 1);
match &out_end[0] {
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
assert_eq!(&item.id, "item_0");
match &item.details {
ThreadItemDetails::FileChange(file_update) => {
assert_eq!(file_update.status, PatchApplyStatus::Failed);
assert_eq!(file_update.changes.len(), 1);
assert_eq!(file_update.changes[0].path, "file.txt".to_string());
assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
}
other => panic!("unexpected details: {other:?}"),
}
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn task_complete_produces_turn_completed_with_usage() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First, feed a TokenCount event with known totals.
let usage = codex_core::protocol::TokenUsage {
input_tokens: 1200,
cached_input_tokens: 200,
output_tokens: 345,
reasoning_output_tokens: 0,
total_tokens: 0,
};
let info = codex_core::protocol::TokenUsageInfo {
total_token_usage: usage.clone(),
last_token_usage: usage,
model_context_window: None,
};
let token_count_event = event(
"e1",
EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
info: Some(info),
rate_limits: None,
}),
);
assert!(ep.collect_thread_events(&token_count_event).is_empty());
// Then TaskComplete should produce turn.completed with the captured usage.
let complete_event = event(
"e2",
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
last_agent_message: Some("done".to_string()),
}),
);
let out = ep.collect_thread_events(&complete_event);
assert_eq!(
out,
vec![ThreadEvent::TurnCompleted(TurnCompletedEvent {
usage: Usage {
input_tokens: 1200,
cached_input_tokens: 200,
output_tokens: 345,
},
})]
);
}

View File

@@ -0,0 +1,4 @@
class BaseClass:
def method():
return True

View File

@@ -0,0 +1,10 @@
event: response.created
data: {"type":"response.created","response":{"id":"resp1"}}
event: response.output_item.done
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
event: response.completed
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}

View File

@@ -0,0 +1,151 @@
#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]
use anyhow::Context;
use assert_cmd::prelude::*;
use codex_core::CODEX_APPLY_PATCH_ARG1;
use core_test_support::responses::ev_apply_patch_custom_tool_call;
use core_test_support::responses::ev_apply_patch_function_call;
use core_test_support::responses::ev_completed;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use std::fs;
use std::process::Command;
use tempfile::tempdir;
/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
/// emulate the `apply_patch` CLI.
#[test]
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
let tmp = tempdir()?;
let relative_path = "source.txt";
let absolute_path = tmp.path().join(relative_path);
fs::write(&absolute_path, "original content\n")?;
Command::cargo_bin("codex-exec")
.context("should find binary for codex-exec")?
.arg(CODEX_APPLY_PATCH_ARG1)
.arg(
r#"*** Begin Patch
*** Update File: source.txt
@@
-original content
+modified by apply_patch
*** End Patch"#,
)
.current_dir(tmp.path())
.assert()
.success()
.stdout("Success. Updated the following files:\nM source.txt\n")
.stderr(predicates::str::is_empty());
assert_eq!(
fs::read_to_string(absolute_path)?,
"modified by apply_patch\n"
);
Ok(())
}
#[cfg(not(target_os = "windows"))]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_tool() -> anyhow::Result<()> {
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
skip_if_no_network!(Ok(()));
let test = test_codex_exec();
let tmp_path = test.cwd_path().to_path_buf();
let add_patch = r#"*** Begin Patch
*** Add File: test.md
+Hello world
*** End Patch"#;
let update_patch = r#"*** Begin Patch
*** Update File: test.md
@@
-Hello world
+Final text
*** End Patch"#;
let response_streams = vec![
sse(vec![
ev_apply_patch_custom_tool_call("request_0", add_patch),
ev_completed("request_0"),
]),
sse(vec![
ev_apply_patch_function_call("request_1", update_patch),
ev_completed("request_1"),
]),
sse(vec![ev_completed("request_2")]),
];
let server = start_mock_server().await;
mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
let final_path = tmp_path.join("test.md");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(contents, "Final text\n");
Ok(())
}
#[cfg(not(target_os = "windows"))]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
skip_if_no_network!(Ok(()));
let test = test_codex_exec();
let freeform_add_patch = r#"*** Begin Patch
*** Add File: app.py
+class BaseClass:
+ def method():
+ return False
*** End Patch"#;
let freeform_update_patch = r#"*** Begin Patch
*** Update File: app.py
@@ def method():
- return False
+
+ return True
*** End Patch"#;
let response_streams = vec![
sse(vec![
ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
ev_completed("request_0"),
]),
sse(vec![
ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
ev_completed("request_1"),
]),
sse(vec![ev_completed("request_2")]),
];
let server = start_mock_server().await;
mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
// Verify final file contents
let final_path = test.cwd_path().join("app.py");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(
contents,
include_str!("../fixtures/apply_patch_freeform_final.txt")
);
Ok(())
}

View File

@@ -0,0 +1,30 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use core_test_support::responses::ev_completed;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex_exec::test_codex_exec;
use wiremock::matchers::header;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> {
let test = test_codex_exec();
let server = start_mock_server().await;
mount_sse_once_match(
&server,
header("Authorization", "Bearer dummy"),
sse(vec![ev_completed("request_0")]),
)
.await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg("echo testing codex api key")
.assert()
.success();
Ok(())
}

View File

@@ -0,0 +1,8 @@
// Aggregates all former standalone integration tests as modules.
mod apply_patch;
mod auth_env;
mod originator;
mod output_schema;
mod resume;
mod sandbox;
mod server_error_exit;

View File

@@ -0,0 +1,52 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use wiremock::matchers::header;
/// Verify that when the server reports an error, `codex-exec` exits with a
/// non-zero status code so automation can detect failures.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn send_codex_exec_originator() -> anyhow::Result<()> {
let test = test_codex_exec();
let server = responses::start_mock_server().await;
let body = responses::sse(vec![
responses::ev_response_created("response_1"),
responses::ev_assistant_message("response_1", "Hello, world!"),
responses::ev_completed("response_1"),
]);
responses::mount_sse_once_match(&server, header("Originator", "codex_exec"), body).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("tell me something")
.assert()
.code(0);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn supports_originator_override() -> anyhow::Result<()> {
let test = test_codex_exec();
let server = responses::start_mock_server().await;
let body = responses::sse(vec![
responses::ev_response_created("response_1"),
responses::ev_assistant_message("response_1", "Hello, world!"),
responses::ev_completed("response_1"),
]);
responses::mount_sse_once_match(&server, header("Originator", "codex_exec_override"), body)
.await;
test.cmd_with_server(&server)
.env("CODEX_INTERNAL_ORIGINATOR_OVERRIDE", "codex_exec_override")
.arg("--skip-git-repo-check")
.arg("tell me something")
.assert()
.code(0);
Ok(())
}

View File

@@ -0,0 +1,63 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use serde_json::Value;
use wiremock::matchers::any;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
let test = test_codex_exec();
let schema_contents = serde_json::json!({
"type": "object",
"properties": {
"answer": { "type": "string" }
},
"required": ["answer"],
"additionalProperties": false
});
let schema_path = test.cwd_path().join("schema.json");
std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
let expected_schema: Value = schema_contents;
let server = responses::start_mock_server().await;
let body = responses::sse(vec![
responses::ev_response_created("resp1"),
responses::ev_assistant_message("m1", "fixture hello"),
responses::ev_completed("resp1"),
]);
let response_mock = responses::mount_sse_once_match(&server, any(), body).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
// keep using -C in the test to exercise the flag as well
.arg("-C")
.arg(test.cwd_path())
.arg("--output-schema")
.arg(&schema_path)
.arg("-m")
.arg("gpt-5")
.arg("tell me a joke")
.assert()
.success();
let request = response_mock.single_request();
let payload: Value = request.body_json();
let text = payload.get("text").expect("request missing text field");
let format = text
.get("format")
.expect("request missing text.format field");
assert_eq!(
format,
&serde_json::json!({
"name": "codex_output_schema",
"type": "json_schema",
"strict": true,
"schema": expected_schema,
})
);
Ok(())
}

View File

@@ -0,0 +1,257 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Context;
use core_test_support::test_codex_exec::test_codex_exec;
use serde_json::Value;
use std::path::Path;
use std::string::ToString;
use uuid::Uuid;
use walkdir::WalkDir;
/// Utility: scan the sessions dir for a rollout file that contains `marker`
/// in any response_item.message.content entry. Returns the absolute path.
fn find_session_file_containing_marker(
sessions_dir: &std::path::Path,
marker: &str,
) -> Option<std::path::PathBuf> {
for entry in WalkDir::new(sessions_dir) {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
continue;
}
let path = entry.path();
let Ok(content) = std::fs::read_to_string(path) else {
continue;
};
// Skip the first meta line and scan remaining JSONL entries.
let mut lines = content.lines();
if lines.next().is_none() {
continue;
}
for line in lines {
if line.trim().is_empty() {
continue;
}
let Ok(item): Result<Value, _> = serde_json::from_str(line) else {
continue;
};
if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
&& let Some(payload) = item.get("payload")
&& payload.get("type").and_then(|t| t.as_str()) == Some("message")
&& payload
.get("content")
.map(ToString::to_string)
.unwrap_or_default()
.contains(marker)
{
return Some(path.to_path_buf());
}
}
}
None
}
/// Extract the conversation UUID from the first SessionMeta line in the rollout file.
fn extract_conversation_id(path: &std::path::Path) -> String {
let content = std::fs::read_to_string(path).unwrap();
let mut lines = content.lines();
let meta_line = lines.next().expect("missing meta line");
let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json");
meta.get("payload")
.and_then(|p| p.get("id"))
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string()
}
#[test]
fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> {
let test = test_codex_exec();
let fixture =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
// 1) First run: create a session with a unique marker in the content.
let marker = format!("resume-last-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
.assert()
.success();
// Find the created session file containing the marker.
let sessions_dir = test.home_path().join("sessions");
let path = find_session_file_containing_marker(&sessions_dir, &marker)
.expect("no session file found after first run");
// 2) Second run: resume the most recent file with a new marker.
let marker2 = format!("resume-last-2-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
.arg("resume")
.arg("--last")
.assert()
.success();
// Ensure the same file was updated and contains both markers.
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
.expect("no resumed session file containing marker2");
assert_eq!(
resumed_path, path,
"resume --last should append to existing file"
);
let content = std::fs::read_to_string(&resumed_path)?;
assert!(content.contains(&marker));
assert!(content.contains(&marker2));
Ok(())
}
#[test]
fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> {
let test = test_codex_exec();
let fixture =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
// 1) First run: create a session
let marker = format!("resume-by-id-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
.assert()
.success();
let sessions_dir = test.home_path().join("sessions");
let path = find_session_file_containing_marker(&sessions_dir, &marker)
.expect("no session file found after first run");
let session_id = extract_conversation_id(&path);
assert!(
!session_id.is_empty(),
"missing conversation id in meta line"
);
// 2) Resume by id
let marker2 = format!("resume-by-id-2-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
.arg("resume")
.arg(&session_id)
.assert()
.success();
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
.expect("no resumed session file containing marker2");
assert_eq!(
resumed_path, path,
"resume by id should append to existing file"
);
let content = std::fs::read_to_string(&resumed_path)?;
assert!(content.contains(&marker));
assert!(content.contains(&marker2));
Ok(())
}
#[test]
fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
let test = test_codex_exec();
let fixture =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
let marker = format!("resume-config-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
test.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("--sandbox")
.arg("workspace-write")
.arg("--model")
.arg("gpt-5")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
.assert()
.success();
let sessions_dir = test.home_path().join("sessions");
let path = find_session_file_containing_marker(&sessions_dir, &marker)
.expect("no session file found after first run");
let marker2 = format!("resume-config-2-{}", Uuid::new_v4());
let prompt2 = format!("echo {marker2}");
let output = test
.cmd()
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local")
.arg("--skip-git-repo-check")
.arg("--sandbox")
.arg("workspace-write")
.arg("--model")
.arg("gpt-5-high")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
.arg("resume")
.arg("--last")
.output()
.context("resume run should succeed")?;
assert!(output.status.success(), "resume run failed: {output:?}");
let stderr = String::from_utf8(output.stderr)?;
assert!(
stderr.contains("model: gpt-5-high"),
"stderr missing model override: {stderr}"
);
if cfg!(target_os = "windows") {
assert!(
stderr.contains("sandbox: read-only"),
"stderr missing downgraded sandbox note: {stderr}"
);
} else {
assert!(
stderr.contains("sandbox: workspace-write"),
"stderr missing sandbox override: {stderr}"
);
}
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
.expect("no resumed session file containing marker2");
assert_eq!(resumed_path, path, "resume should append to same file");
let content = std::fs::read_to_string(&resumed_path)?;
assert!(content.contains(&marker));
assert!(content.contains(&marker2));
Ok(())
}

View File

@@ -0,0 +1,322 @@
#![cfg(unix)]
use codex_core::protocol::SandboxPolicy;
use codex_core::spawn::StdioPolicy;
use std::collections::HashMap;
use std::future::Future;
use std::io;
use std::path::Path;
use std::path::PathBuf;
use std::process::ExitStatus;
use tokio::fs::create_dir_all;
use tokio::process::Child;
#[cfg(target_os = "macos")]
async fn spawn_command_under_sandbox(
command: Vec<String>,
command_cwd: PathBuf,
sandbox_policy: &SandboxPolicy,
sandbox_cwd: &Path,
stdio_policy: StdioPolicy,
env: HashMap<String, String>,
) -> std::io::Result<Child> {
use codex_core::seatbelt::spawn_command_under_seatbelt;
spawn_command_under_seatbelt(
command,
command_cwd,
sandbox_policy,
sandbox_cwd,
stdio_policy,
env,
)
.await
}
#[cfg(target_os = "linux")]
async fn spawn_command_under_sandbox(
command: Vec<String>,
command_cwd: PathBuf,
sandbox_policy: &SandboxPolicy,
sandbox_cwd: &Path,
stdio_policy: StdioPolicy,
env: HashMap<String, String>,
) -> std::io::Result<Child> {
use codex_core::landlock::spawn_command_under_linux_sandbox;
let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
spawn_command_under_linux_sandbox(
codex_linux_sandbox_exe,
command,
command_cwd,
sandbox_policy,
sandbox_cwd,
stdio_policy,
env,
)
.await
}
#[tokio::test]
async fn python_multiprocessing_lock_works_under_sandbox() {
core_test_support::skip_if_sandbox!();
#[cfg(target_os = "macos")]
let writable_roots = Vec::<PathBuf>::new();
// From https://man7.org/linux/man-pages/man7/sem_overview.7.html
//
// > On Linux, named semaphores are created in a virtual filesystem,
// > normally mounted under /dev/shm.
#[cfg(target_os = "linux")]
let writable_roots = vec![PathBuf::from("/dev/shm")];
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots,
network_access: false,
exclude_tmpdir_env_var: false,
exclude_slash_tmp: false,
};
let python_code = r#"import multiprocessing
from multiprocessing import Lock, Process
def f(lock):
with lock:
print("Lock acquired in child process")
if __name__ == '__main__':
lock = Lock()
p = Process(target=f, args=(lock,))
p.start()
p.join()
"#;
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
let sandbox_cwd = command_cwd.clone();
let mut child = spawn_command_under_sandbox(
vec![
"python3".to_string(),
"-c".to_string(),
python_code.to_string(),
],
command_cwd,
&policy,
sandbox_cwd.as_path(),
StdioPolicy::Inherit,
HashMap::new(),
)
.await
.expect("should be able to spawn python under sandbox");
let status = child.wait().await.expect("should wait for child process");
assert!(status.success(), "python exited with {status:?}");
}
#[tokio::test]
async fn sandbox_distinguishes_command_and_policy_cwds() {
core_test_support::skip_if_sandbox!();
let temp = tempfile::tempdir().expect("should be able to create temp dir");
let sandbox_root = temp.path().join("sandbox");
let command_root = temp.path().join("command");
create_dir_all(&sandbox_root).await.expect("mkdir");
create_dir_all(&command_root).await.expect("mkdir");
let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
.await
.expect("canonicalize sandbox root");
let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
let disallowed_path = command_root.join("forbidden.txt");
// Note writable_roots is empty: verify that `canonical_allowed_path` is
// writable only because it is under the sandbox policy cwd, not because it
// is under a writable root.
let policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![],
network_access: false,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
};
// Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
let mut child = spawn_command_under_sandbox(
vec![
"bash".to_string(),
"-lc".to_string(),
"echo forbidden > forbidden.txt".to_string(),
],
command_root.clone(),
&policy,
canonical_sandbox_root.as_path(),
StdioPolicy::Inherit,
HashMap::new(),
)
.await
.expect("should spawn command writing to forbidden path");
let status = child
.wait()
.await
.expect("should wait for forbidden command");
assert!(
!status.success(),
"sandbox unexpectedly allowed writing to command cwd: {status:?}"
);
let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
.await
.expect("try_exists failed");
assert!(
!forbidden_exists,
"forbidden path should not have been created"
);
// Writing to the sandbox policy cwd after changing directories into it should succeed.
let mut child = spawn_command_under_sandbox(
vec![
"/usr/bin/touch".to_string(),
canonical_allowed_path.to_string_lossy().into_owned(),
],
command_root,
&policy,
canonical_sandbox_root.as_path(),
StdioPolicy::Inherit,
HashMap::new(),
)
.await
.expect("should spawn command writing to sandbox root");
let status = child.wait().await.expect("should wait for allowed command");
assert!(
status.success(),
"sandbox blocked allowed write: {status:?}"
);
let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
.await
.expect("try_exists allowed failed");
assert!(allowed_exists, "allowed path should exist");
}
fn unix_sock_body() {
unsafe {
let mut fds = [0i32; 2];
let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
assert_eq!(
r,
0,
"socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
io::Error::last_os_error()
);
let msg = b"hello_unix";
// write() from one end (generic write is allowed)
let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
// recvfrom() on the other end. We dont need the address for socketpair,
// so we pass null pointers for src address.
let mut buf = [0u8; 64];
let recvd = libc::recvfrom(
fds[1],
buf.as_mut_ptr() as *mut libc::c_void,
buf.len(),
0,
std::ptr::null_mut(),
std::ptr::null_mut(),
);
assert!(
recvd >= 0,
"recvfrom() failed: {}",
io::Error::last_os_error()
);
let recvd_slice = &buf[..(recvd as usize)];
assert_eq!(
recvd_slice,
&msg[..],
"payload mismatch: sent {} bytes, got {} bytes",
msg.len(),
recvd
);
// Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
let mut sfds = [0i32; 2];
let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
assert_eq!(
sr,
0,
"socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
io::Error::last_os_error()
);
let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
assert!(
snt2 >= 0,
"write(stream) failed: {}",
io::Error::last_os_error()
);
let mut b2 = [0u8; 64];
let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
assert!(
rcv2 >= 0,
"recv(stream) failed: {}",
io::Error::last_os_error()
);
// Clean up
let _ = libc::close(sfds[0]);
let _ = libc::close(sfds[1]);
let _ = libc::close(fds[0]);
let _ = libc::close(fds[1]);
}
}
#[tokio::test]
async fn allow_unix_socketpair_recvfrom() {
run_code_under_sandbox(
"allow_unix_socketpair_recvfrom",
&SandboxPolicy::ReadOnly,
|| async { unix_sock_body() },
)
.await
.expect("should be able to reexec");
}
const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
#[expect(clippy::expect_used)]
pub async fn run_code_under_sandbox<F, Fut>(
test_selector: &str,
policy: &SandboxPolicy,
child_body: F,
) -> io::Result<Option<ExitStatus>>
where
F: FnOnce() -> Fut + Send + 'static,
Fut: Future<Output = ()> + Send + 'static,
{
if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
let exe = std::env::current_exe()?;
let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
let mut stdio_policy = StdioPolicy::RedirectForShellTool;
// Allow for us to pass forward --nocapture / use the right stdio policy.
if std::env::args().any(|a| a == "--nocapture") {
cmds.push("--nocapture".into());
stdio_policy = StdioPolicy::Inherit;
}
cmds.push(test_selector.into());
// Your existing launcher:
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
let sandbox_cwd = command_cwd.clone();
let mut child = spawn_command_under_sandbox(
cmds,
command_cwd,
policy,
sandbox_cwd.as_path(),
stdio_policy,
HashMap::from([("IN_SANDBOX".into(), "1".into())]),
)
.await?;
let status = child.wait().await?;
Ok(Some(status))
} else {
// Child branch: run the provided body.
child_body().await;
Ok(None)
}
}

View File

@@ -0,0 +1,34 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use wiremock::matchers::any;
/// Verify that when the server reports an error, `codex-exec` exits with a
/// non-zero status code so automation can detect failures.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
let test = test_codex_exec();
// Mock a simple Responses API SSE stream that immediately reports a
// `response.failed` event with an error message.
let server = responses::start_mock_server().await;
let body = responses::sse(vec![serde_json::json!({
"type": "response.failed",
"response": {
"id": "resp_err_1",
"error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
}
})]);
responses::mount_sse_once_match(&server, any(), body).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("tell me something")
.arg("--experimental-json")
.assert()
.code(1);
Ok(())
}