Phase 1: Repository & Infrastructure Setup
- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
- Root: llmx-monorepo
- CLI: @llmx/llmx
- SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx
🤖 Generated with Claude Code
This commit is contained in:
62
llmx-rs/exec/Cargo.toml
Normal file
62
llmx-rs/exec/Cargo.toml
Normal file
@@ -0,0 +1,62 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-exec"
|
||||
version = { workspace = true }
|
||||
|
||||
[[bin]]
|
||||
name = "codex-exec"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lib]
|
||||
name = "codex_exec"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-arg0 = { workspace = true }
|
||||
codex-common = { workspace = true, features = [
|
||||
"cli",
|
||||
"elapsed",
|
||||
"sandbox_summary",
|
||||
] }
|
||||
codex-core = { workspace = true }
|
||||
codex-ollama = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
opentelemetry-appender-tracing = { workspace = true }
|
||||
owo-colors = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
supports-color = { workspace = true }
|
||||
tokio = { workspace = true, features = [
|
||||
"io-std",
|
||||
"macros",
|
||||
"process",
|
||||
"rt-multi-thread",
|
||||
"signal",
|
||||
] }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter"] }
|
||||
ts-rs = { workspace = true, features = [
|
||||
"uuid-impl",
|
||||
"serde-json-impl",
|
||||
"no-serde-warnings",
|
||||
] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = { workspace = true }
|
||||
core_test_support = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
predicates = { workspace = true }
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
wiremock = { workspace = true }
|
||||
109
llmx-rs/exec/src/cli.rs
Normal file
109
llmx-rs/exec/src/cli.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version)]
|
||||
pub struct Cli {
|
||||
/// Action to perform. If omitted, runs a new non-interactive session.
|
||||
#[command(subcommand)]
|
||||
pub command: Option<Command>,
|
||||
|
||||
/// Optional image(s) to attach to the initial prompt.
|
||||
#[arg(long = "image", short = 'i', value_name = "FILE", value_delimiter = ',', num_args = 1..)]
|
||||
pub images: Vec<PathBuf>,
|
||||
|
||||
/// Model the agent should use.
|
||||
#[arg(long, short = 'm')]
|
||||
pub model: Option<String>,
|
||||
|
||||
#[arg(long = "oss", default_value_t = false)]
|
||||
pub oss: bool,
|
||||
|
||||
/// Select the sandbox policy to use when executing model-generated shell
|
||||
/// commands.
|
||||
#[arg(long = "sandbox", short = 's', value_enum)]
|
||||
pub sandbox_mode: Option<codex_common::SandboxModeCliArg>,
|
||||
|
||||
/// Configuration profile from config.toml to specify default options.
|
||||
#[arg(long = "profile", short = 'p')]
|
||||
pub config_profile: Option<String>,
|
||||
|
||||
/// Convenience alias for low-friction sandboxed automatic execution (-a on-failure, --sandbox workspace-write).
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
/// Skip all confirmation prompts and execute commands without sandboxing.
|
||||
/// EXTREMELY DANGEROUS. Intended solely for running in environments that are externally sandboxed.
|
||||
#[arg(
|
||||
long = "dangerously-bypass-approvals-and-sandbox",
|
||||
alias = "yolo",
|
||||
default_value_t = false,
|
||||
conflicts_with = "full_auto"
|
||||
)]
|
||||
pub dangerously_bypass_approvals_and_sandbox: bool,
|
||||
|
||||
/// Tell the agent to use the specified directory as its working root.
|
||||
#[clap(long = "cd", short = 'C', value_name = "DIR")]
|
||||
pub cwd: Option<PathBuf>,
|
||||
|
||||
/// Allow running Codex outside a Git repository.
|
||||
#[arg(long = "skip-git-repo-check", default_value_t = false)]
|
||||
pub skip_git_repo_check: bool,
|
||||
|
||||
/// Path to a JSON Schema file describing the model's final response shape.
|
||||
#[arg(long = "output-schema", value_name = "FILE")]
|
||||
pub output_schema: Option<PathBuf>,
|
||||
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
/// Specifies color settings for use in the output.
|
||||
#[arg(long = "color", value_enum, default_value_t = Color::Auto)]
|
||||
pub color: Color,
|
||||
|
||||
/// Print events to stdout as JSONL.
|
||||
#[arg(long = "json", alias = "experimental-json", default_value_t = false)]
|
||||
pub json: bool,
|
||||
|
||||
/// Specifies file where the last message from the agent should be written.
|
||||
#[arg(long = "output-last-message", short = 'o', value_name = "FILE")]
|
||||
pub last_message_file: Option<PathBuf>,
|
||||
|
||||
/// Initial instructions for the agent. If not provided as an argument (or
|
||||
/// if `-` is used), instructions are read from stdin.
|
||||
#[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
|
||||
pub prompt: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Subcommand)]
|
||||
pub enum Command {
|
||||
/// Resume a previous session by id or pick the most recent with --last.
|
||||
Resume(ResumeArgs),
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct ResumeArgs {
|
||||
/// Conversation/session id (UUID). When provided, resumes this session.
|
||||
/// If omitted, use --last to pick the most recent recorded session.
|
||||
#[arg(value_name = "SESSION_ID")]
|
||||
pub session_id: Option<String>,
|
||||
|
||||
/// Resume the most recent recorded session (newest) without specifying an id.
|
||||
#[arg(long = "last", default_value_t = false, conflicts_with = "session_id")]
|
||||
pub last: bool,
|
||||
|
||||
/// Prompt to send after resuming the session. If `-` is used, read from stdin.
|
||||
#[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
|
||||
pub prompt: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, ValueEnum)]
|
||||
#[value(rename_all = "kebab-case")]
|
||||
pub enum Color {
|
||||
Always,
|
||||
Never,
|
||||
#[default]
|
||||
Auto,
|
||||
}
|
||||
45
llmx-rs/exec/src/event_processor.rs
Normal file
45
llmx-rs/exec/src/event_processor.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use std::path::Path;
|
||||
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
|
||||
pub(crate) enum CodexStatus {
|
||||
Running,
|
||||
InitiateShutdown,
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
pub(crate) trait EventProcessor {
|
||||
/// Print summary of effective configuration and user prompt.
|
||||
fn print_config_summary(
|
||||
&mut self,
|
||||
config: &Config,
|
||||
prompt: &str,
|
||||
session_configured: &SessionConfiguredEvent,
|
||||
);
|
||||
|
||||
/// Handle a single event emitted by the agent.
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus;
|
||||
|
||||
fn print_final_output(&mut self) {}
|
||||
}
|
||||
|
||||
pub(crate) fn handle_last_message(last_agent_message: Option<&str>, output_file: &Path) {
|
||||
let message = last_agent_message.unwrap_or_default();
|
||||
write_last_message_file(message, Some(output_file));
|
||||
if last_agent_message.is_none() {
|
||||
eprintln!(
|
||||
"Warning: no last agent message; wrote empty content to {}",
|
||||
output_file.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn write_last_message_file(contents: &str, last_message_path: Option<&Path>) {
|
||||
if let Some(path) = last_message_path
|
||||
&& let Err(e) = std::fs::write(path, contents)
|
||||
{
|
||||
eprintln!("Failed to write last message file {path:?}: {e}");
|
||||
}
|
||||
}
|
||||
599
llmx-rs/exec/src/event_processor_with_human_output.rs
Normal file
599
llmx-rs/exec/src/event_processor_with_human_output.rs
Normal file
@@ -0,0 +1,599 @@
|
||||
use codex_common::elapsed::format_duration;
|
||||
use codex_common::elapsed::format_elapsed;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::AgentReasoningRawContentEvent;
|
||||
use codex_core::protocol::BackgroundEventEvent;
|
||||
use codex_core::protocol::DeprecationNoticeEvent;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::McpInvocation;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_core::protocol::StreamErrorEvent;
|
||||
use codex_core::protocol::TaskCompleteEvent;
|
||||
use codex_core::protocol::TurnAbortReason;
|
||||
use codex_core::protocol::TurnDiffEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_protocol::num_format::format_with_separators;
|
||||
use owo_colors::OwoColorize;
|
||||
use owo_colors::Style;
|
||||
use shlex::try_join;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
use crate::event_processor::handle_last_message;
|
||||
use codex_common::create_config_summary_entries;
|
||||
use codex_protocol::plan_tool::StepStatus;
|
||||
use codex_protocol::plan_tool::UpdatePlanArgs;
|
||||
|
||||
/// This should be configurable. When used in CI, users may not want to impose
|
||||
/// a limit so they can see the full transcript.
|
||||
const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
|
||||
pub(crate) struct EventProcessorWithHumanOutput {
|
||||
call_id_to_patch: HashMap<String, PatchApplyBegin>,
|
||||
|
||||
// To ensure that --color=never is respected, ANSI escapes _must_ be added
|
||||
// using .style() with one of these fields. If you need a new style, add a
|
||||
// new field here.
|
||||
bold: Style,
|
||||
italic: Style,
|
||||
dimmed: Style,
|
||||
|
||||
magenta: Style,
|
||||
red: Style,
|
||||
green: Style,
|
||||
cyan: Style,
|
||||
yellow: Style,
|
||||
|
||||
/// Whether to include `AgentReasoning` events in the output.
|
||||
show_agent_reasoning: bool,
|
||||
show_raw_agent_reasoning: bool,
|
||||
last_message_path: Option<PathBuf>,
|
||||
last_total_token_usage: Option<codex_core::protocol::TokenUsageInfo>,
|
||||
final_message: Option<String>,
|
||||
}
|
||||
|
||||
impl EventProcessorWithHumanOutput {
|
||||
pub(crate) fn create_with_ansi(
|
||||
with_ansi: bool,
|
||||
config: &Config,
|
||||
last_message_path: Option<PathBuf>,
|
||||
) -> Self {
|
||||
let call_id_to_patch = HashMap::new();
|
||||
|
||||
if with_ansi {
|
||||
Self {
|
||||
call_id_to_patch,
|
||||
bold: Style::new().bold(),
|
||||
italic: Style::new().italic(),
|
||||
dimmed: Style::new().dimmed(),
|
||||
magenta: Style::new().magenta(),
|
||||
red: Style::new().red(),
|
||||
green: Style::new().green(),
|
||||
cyan: Style::new().cyan(),
|
||||
yellow: Style::new().yellow(),
|
||||
show_agent_reasoning: !config.hide_agent_reasoning,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
last_message_path,
|
||||
last_total_token_usage: None,
|
||||
final_message: None,
|
||||
}
|
||||
} else {
|
||||
Self {
|
||||
call_id_to_patch,
|
||||
bold: Style::new(),
|
||||
italic: Style::new(),
|
||||
dimmed: Style::new(),
|
||||
magenta: Style::new(),
|
||||
red: Style::new(),
|
||||
green: Style::new(),
|
||||
cyan: Style::new(),
|
||||
yellow: Style::new(),
|
||||
show_agent_reasoning: !config.hide_agent_reasoning,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
last_message_path,
|
||||
last_total_token_usage: None,
|
||||
final_message: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct PatchApplyBegin {
|
||||
start_time: Instant,
|
||||
auto_approved: bool,
|
||||
}
|
||||
|
||||
/// Timestamped helper. The timestamp is styled with self.dimmed.
|
||||
macro_rules! ts_msg {
|
||||
($self:ident, $($arg:tt)*) => {{
|
||||
eprintln!($($arg)*);
|
||||
}};
|
||||
}
|
||||
|
||||
impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
/// Print a concise summary of the effective configuration that will be used
|
||||
/// for the session. This mirrors the information shown in the TUI welcome
|
||||
/// screen.
|
||||
fn print_config_summary(
|
||||
&mut self,
|
||||
config: &Config,
|
||||
prompt: &str,
|
||||
session_configured_event: &SessionConfiguredEvent,
|
||||
) {
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
ts_msg!(
|
||||
self,
|
||||
"OpenAI Codex v{} (research preview)\n--------",
|
||||
VERSION
|
||||
);
|
||||
|
||||
let mut entries = create_config_summary_entries(config);
|
||||
entries.push((
|
||||
"session id",
|
||||
session_configured_event.session_id.to_string(),
|
||||
));
|
||||
|
||||
for (key, value) in entries {
|
||||
eprintln!("{} {}", format!("{key}:").style(self.bold), value);
|
||||
}
|
||||
|
||||
eprintln!("--------");
|
||||
|
||||
// Echo the prompt that will be sent to the agent so it is visible in the
|
||||
// transcript/logs before any events come in. Note the prompt may have been
|
||||
// read from stdin, so it may not be visible in the terminal otherwise.
|
||||
ts_msg!(self, "{}\n{}", "user".style(self.cyan), prompt);
|
||||
}
|
||||
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus {
|
||||
let Event { id: _, msg } = event;
|
||||
match msg {
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
let prefix = "ERROR:".style(self.red);
|
||||
ts_msg!(self, "{prefix} {message}");
|
||||
}
|
||||
EventMsg::Warning(WarningEvent { message }) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {message}",
|
||||
"warning:".style(self.yellow).style(self.bold)
|
||||
);
|
||||
}
|
||||
EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary, details }) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {summary}",
|
||||
"deprecated:".style(self.magenta).style(self.bold)
|
||||
);
|
||||
if let Some(details) = details {
|
||||
ts_msg!(self, " {}", details.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
|
||||
ts_msg!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
EventMsg::StreamError(StreamErrorEvent { message }) => {
|
||||
ts_msg!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
EventMsg::TaskStarted(_) => {
|
||||
// Ignore.
|
||||
}
|
||||
EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
|
||||
let last_message = last_agent_message.as_deref();
|
||||
if let Some(output_file) = self.last_message_path.as_deref() {
|
||||
handle_last_message(last_message, output_file);
|
||||
}
|
||||
|
||||
self.final_message = last_agent_message;
|
||||
|
||||
return CodexStatus::InitiateShutdown;
|
||||
}
|
||||
EventMsg::TokenCount(ev) => {
|
||||
self.last_total_token_usage = ev.info;
|
||||
}
|
||||
|
||||
EventMsg::AgentReasoningSectionBreak(_) => {
|
||||
if !self.show_agent_reasoning {
|
||||
return CodexStatus::Running;
|
||||
}
|
||||
eprintln!();
|
||||
}
|
||||
EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent { text }) => {
|
||||
if self.show_raw_agent_reasoning {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"thinking".style(self.italic).style(self.magenta),
|
||||
text,
|
||||
);
|
||||
}
|
||||
}
|
||||
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"codex".style(self.italic).style(self.magenta),
|
||||
message,
|
||||
);
|
||||
}
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent { command, cwd, .. }) => {
|
||||
eprint!(
|
||||
"{}\n{} in {}",
|
||||
"exec".style(self.italic).style(self.magenta),
|
||||
escape_command(&command).style(self.bold),
|
||||
cwd.to_string_lossy(),
|
||||
);
|
||||
}
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
aggregated_output,
|
||||
duration,
|
||||
exit_code,
|
||||
..
|
||||
}) => {
|
||||
let duration = format!(" in {}", format_duration(duration));
|
||||
|
||||
let truncated_output = aggregated_output
|
||||
.lines()
|
||||
.take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
match exit_code {
|
||||
0 => {
|
||||
let title = format!(" succeeded{duration}:");
|
||||
ts_msg!(self, "{}", title.style(self.green));
|
||||
}
|
||||
_ => {
|
||||
let title = format!(" exited {exit_code}{duration}:");
|
||||
ts_msg!(self, "{}", title.style(self.red));
|
||||
}
|
||||
}
|
||||
eprintln!("{}", truncated_output.style(self.dimmed));
|
||||
}
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: _,
|
||||
invocation,
|
||||
}) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {}",
|
||||
"tool".style(self.magenta),
|
||||
format_mcp_invocation(&invocation).style(self.bold),
|
||||
);
|
||||
}
|
||||
EventMsg::McpToolCallEnd(tool_call_end_event) => {
|
||||
let is_success = tool_call_end_event.is_success();
|
||||
let McpToolCallEndEvent {
|
||||
call_id: _,
|
||||
result,
|
||||
invocation,
|
||||
duration,
|
||||
} = tool_call_end_event;
|
||||
|
||||
let duration = format!(" in {}", format_duration(duration));
|
||||
|
||||
let status_str = if is_success { "success" } else { "failed" };
|
||||
let title_style = if is_success { self.green } else { self.red };
|
||||
let title = format!(
|
||||
"{} {status_str}{duration}:",
|
||||
format_mcp_invocation(&invocation)
|
||||
);
|
||||
|
||||
ts_msg!(self, "{}", title.style(title_style));
|
||||
|
||||
if let Ok(res) = result {
|
||||
let val: serde_json::Value = res.into();
|
||||
let pretty =
|
||||
serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
|
||||
|
||||
for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
|
||||
eprintln!("{}", line.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::WebSearchEnd(WebSearchEndEvent { call_id: _, query }) => {
|
||||
ts_msg!(self, "🌐 Searched: {query}");
|
||||
}
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id,
|
||||
auto_approved,
|
||||
changes,
|
||||
}) => {
|
||||
// Store metadata so we can calculate duration later when we
|
||||
// receive the corresponding PatchApplyEnd event.
|
||||
self.call_id_to_patch.insert(
|
||||
call_id,
|
||||
PatchApplyBegin {
|
||||
start_time: Instant::now(),
|
||||
auto_approved,
|
||||
},
|
||||
);
|
||||
|
||||
ts_msg!(
|
||||
self,
|
||||
"{}",
|
||||
"file update".style(self.magenta).style(self.italic),
|
||||
);
|
||||
|
||||
// Pretty-print the patch summary with colored diff markers so
|
||||
// it's easy to scan in the terminal output.
|
||||
for (path, change) in changes.iter() {
|
||||
match change {
|
||||
FileChange::Add { content } => {
|
||||
let header = format!(
|
||||
"{} {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy()
|
||||
);
|
||||
eprintln!("{}", header.style(self.magenta));
|
||||
for line in content.lines() {
|
||||
eprintln!("{}", line.style(self.green));
|
||||
}
|
||||
}
|
||||
FileChange::Delete { content } => {
|
||||
let header = format!(
|
||||
"{} {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy()
|
||||
);
|
||||
eprintln!("{}", header.style(self.magenta));
|
||||
for line in content.lines() {
|
||||
eprintln!("{}", line.style(self.red));
|
||||
}
|
||||
}
|
||||
FileChange::Update {
|
||||
unified_diff,
|
||||
move_path,
|
||||
} => {
|
||||
let header = if let Some(dest) = move_path {
|
||||
format!(
|
||||
"{} {} -> {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy(),
|
||||
dest.to_string_lossy()
|
||||
)
|
||||
} else {
|
||||
format!("{} {}", format_file_change(change), path.to_string_lossy())
|
||||
};
|
||||
eprintln!("{}", header.style(self.magenta));
|
||||
|
||||
// Colorize diff lines. We keep file header lines
|
||||
// (--- / +++) without extra coloring so they are
|
||||
// still readable.
|
||||
for diff_line in unified_diff.lines() {
|
||||
if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
|
||||
eprintln!("{}", diff_line.style(self.green));
|
||||
} else if diff_line.starts_with('-')
|
||||
&& !diff_line.starts_with("---")
|
||||
{
|
||||
eprintln!("{}", diff_line.style(self.red));
|
||||
} else {
|
||||
eprintln!("{diff_line}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id,
|
||||
stdout,
|
||||
stderr,
|
||||
success,
|
||||
..
|
||||
}) => {
|
||||
let patch_begin = self.call_id_to_patch.remove(&call_id);
|
||||
|
||||
// Compute duration and summary label similar to exec commands.
|
||||
let (duration, label) = if let Some(PatchApplyBegin {
|
||||
start_time,
|
||||
auto_approved,
|
||||
}) = patch_begin
|
||||
{
|
||||
(
|
||||
format!(" in {}", format_elapsed(start_time)),
|
||||
format!("apply_patch(auto_approved={auto_approved})"),
|
||||
)
|
||||
} else {
|
||||
(String::new(), format!("apply_patch('{call_id}')"))
|
||||
};
|
||||
|
||||
let (exit_code, output, title_style) = if success {
|
||||
(0, stdout, self.green)
|
||||
} else {
|
||||
(1, stderr, self.red)
|
||||
};
|
||||
|
||||
let title = format!("{label} exited {exit_code}{duration}:");
|
||||
ts_msg!(self, "{}", title.style(title_style));
|
||||
for line in output.lines() {
|
||||
eprintln!("{}", line.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{}",
|
||||
"file update:".style(self.magenta).style(self.italic)
|
||||
);
|
||||
eprintln!("{unified_diff}");
|
||||
}
|
||||
EventMsg::AgentReasoning(agent_reasoning_event) => {
|
||||
if self.show_agent_reasoning {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"thinking".style(self.italic).style(self.magenta),
|
||||
agent_reasoning_event.text,
|
||||
);
|
||||
}
|
||||
}
|
||||
EventMsg::SessionConfigured(session_configured_event) => {
|
||||
let SessionConfiguredEvent {
|
||||
session_id: conversation_id,
|
||||
model,
|
||||
reasoning_effort: _,
|
||||
history_log_id: _,
|
||||
history_entry_count: _,
|
||||
initial_messages: _,
|
||||
rollout_path: _,
|
||||
} = session_configured_event;
|
||||
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {}",
|
||||
"codex session".style(self.magenta).style(self.bold),
|
||||
conversation_id.to_string().style(self.dimmed)
|
||||
);
|
||||
|
||||
ts_msg!(self, "model: {}", model);
|
||||
eprintln!();
|
||||
}
|
||||
EventMsg::PlanUpdate(plan_update_event) => {
|
||||
let UpdatePlanArgs { explanation, plan } = plan_update_event;
|
||||
|
||||
// Header
|
||||
ts_msg!(self, "{}", "Plan update".style(self.magenta));
|
||||
|
||||
// Optional explanation
|
||||
if let Some(explanation) = explanation
|
||||
&& !explanation.trim().is_empty()
|
||||
{
|
||||
ts_msg!(self, "{}", explanation.style(self.italic));
|
||||
}
|
||||
|
||||
// Pretty-print the plan items with simple status markers.
|
||||
for item in plan {
|
||||
match item.status {
|
||||
StepStatus::Completed => {
|
||||
ts_msg!(self, " {} {}", "✓".style(self.green), item.step);
|
||||
}
|
||||
StepStatus::InProgress => {
|
||||
ts_msg!(self, " {} {}", "→".style(self.cyan), item.step);
|
||||
}
|
||||
StepStatus::Pending => {
|
||||
ts_msg!(
|
||||
self,
|
||||
" {} {}",
|
||||
"•".style(self.dimmed),
|
||||
item.step.style(self.dimmed)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::ViewImageToolCall(view) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {}",
|
||||
"viewed image".style(self.magenta),
|
||||
view.path.display()
|
||||
);
|
||||
}
|
||||
EventMsg::TurnAborted(abort_reason) => match abort_reason.reason {
|
||||
TurnAbortReason::Interrupted => {
|
||||
ts_msg!(self, "task interrupted");
|
||||
}
|
||||
TurnAbortReason::Replaced => {
|
||||
ts_msg!(self, "task aborted: replaced by a new task");
|
||||
}
|
||||
TurnAbortReason::ReviewEnded => {
|
||||
ts_msg!(self, "task aborted: review ended");
|
||||
}
|
||||
},
|
||||
EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
|
||||
EventMsg::WebSearchBegin(_)
|
||||
| EventMsg::ExecApprovalRequest(_)
|
||||
| EventMsg::ApplyPatchApprovalRequest(_)
|
||||
| EventMsg::ExecCommandOutputDelta(_)
|
||||
| EventMsg::GetHistoryEntryResponse(_)
|
||||
| EventMsg::McpListToolsResponse(_)
|
||||
| EventMsg::ListCustomPromptsResponse(_)
|
||||
| EventMsg::RawResponseItem(_)
|
||||
| EventMsg::UserMessage(_)
|
||||
| EventMsg::EnteredReviewMode(_)
|
||||
| EventMsg::ExitedReviewMode(_)
|
||||
| EventMsg::AgentMessageDelta(_)
|
||||
| EventMsg::AgentReasoningDelta(_)
|
||||
| EventMsg::AgentReasoningRawContentDelta(_)
|
||||
| EventMsg::ItemStarted(_)
|
||||
| EventMsg::ItemCompleted(_)
|
||||
| EventMsg::AgentMessageContentDelta(_)
|
||||
| EventMsg::ReasoningContentDelta(_)
|
||||
| EventMsg::ReasoningRawContentDelta(_)
|
||||
| EventMsg::UndoCompleted(_)
|
||||
| EventMsg::UndoStarted(_) => {}
|
||||
}
|
||||
CodexStatus::Running
|
||||
}
|
||||
|
||||
fn print_final_output(&mut self) {
|
||||
if let Some(usage_info) = &self.last_total_token_usage {
|
||||
eprintln!(
|
||||
"{}\n{}",
|
||||
"tokens used".style(self.magenta).style(self.italic),
|
||||
format_with_separators(usage_info.total_token_usage.blended_total())
|
||||
);
|
||||
}
|
||||
|
||||
// If the user has not piped the final message to a file, they will see
|
||||
// it twice: once written to stderr as part of the normal event
|
||||
// processing, and once here on stdout. We print the token summary above
|
||||
// to help break up the output visually in that case.
|
||||
#[allow(clippy::print_stdout)]
|
||||
if let Some(message) = &self.final_message {
|
||||
if message.ends_with('\n') {
|
||||
print!("{message}");
|
||||
} else {
|
||||
println!("{message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_command(command: &[String]) -> String {
|
||||
try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "))
|
||||
}
|
||||
|
||||
fn format_file_change(change: &FileChange) -> &'static str {
|
||||
match change {
|
||||
FileChange::Add { .. } => "A",
|
||||
FileChange::Delete { .. } => "D",
|
||||
FileChange::Update {
|
||||
move_path: Some(_), ..
|
||||
} => "R",
|
||||
FileChange::Update {
|
||||
move_path: None, ..
|
||||
} => "M",
|
||||
}
|
||||
}
|
||||
|
||||
fn format_mcp_invocation(invocation: &McpInvocation) -> String {
|
||||
// Build fully-qualified tool name: server.tool
|
||||
let fq_tool_name = format!("{}.{}", invocation.server, invocation.tool);
|
||||
|
||||
// Format arguments as compact JSON so they fit on one line.
|
||||
let args_str = invocation
|
||||
.arguments
|
||||
.as_ref()
|
||||
.map(|v: &serde_json::Value| serde_json::to_string(v).unwrap_or_else(|_| v.to_string()))
|
||||
.unwrap_or_default();
|
||||
|
||||
if args_str.is_empty() {
|
||||
format!("{fq_tool_name}()")
|
||||
} else {
|
||||
format!("{fq_tool_name}({args_str})")
|
||||
}
|
||||
}
|
||||
501
llmx-rs/exec/src/event_processor_with_jsonl_output.rs
Normal file
501
llmx-rs/exec/src/event_processor_with_jsonl_output.rs
Normal file
@@ -0,0 +1,501 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
use crate::event_processor::handle_last_message;
|
||||
use crate::exec_events::AgentMessageItem;
|
||||
use crate::exec_events::CommandExecutionItem;
|
||||
use crate::exec_events::CommandExecutionStatus;
|
||||
use crate::exec_events::ErrorItem;
|
||||
use crate::exec_events::FileChangeItem;
|
||||
use crate::exec_events::FileUpdateChange;
|
||||
use crate::exec_events::ItemCompletedEvent;
|
||||
use crate::exec_events::ItemStartedEvent;
|
||||
use crate::exec_events::ItemUpdatedEvent;
|
||||
use crate::exec_events::McpToolCallItem;
|
||||
use crate::exec_events::McpToolCallItemError;
|
||||
use crate::exec_events::McpToolCallItemResult;
|
||||
use crate::exec_events::McpToolCallStatus;
|
||||
use crate::exec_events::PatchApplyStatus;
|
||||
use crate::exec_events::PatchChangeKind;
|
||||
use crate::exec_events::ReasoningItem;
|
||||
use crate::exec_events::ThreadErrorEvent;
|
||||
use crate::exec_events::ThreadEvent;
|
||||
use crate::exec_events::ThreadItem;
|
||||
use crate::exec_events::ThreadItemDetails;
|
||||
use crate::exec_events::ThreadStartedEvent;
|
||||
use crate::exec_events::TodoItem;
|
||||
use crate::exec_events::TodoListItem;
|
||||
use crate::exec_events::TurnCompletedEvent;
|
||||
use crate::exec_events::TurnFailedEvent;
|
||||
use crate::exec_events::TurnStartedEvent;
|
||||
use crate::exec_events::Usage;
|
||||
use crate::exec_events::WebSearchItem;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::AgentReasoningEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_core::protocol::TaskCompleteEvent;
|
||||
use codex_core::protocol::TaskStartedEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_protocol::plan_tool::StepStatus;
|
||||
use codex_protocol::plan_tool::UpdatePlanArgs;
|
||||
use serde_json::Value as JsonValue;
|
||||
use tracing::error;
|
||||
use tracing::warn;
|
||||
|
||||
pub struct EventProcessorWithJsonOutput {
|
||||
last_message_path: Option<PathBuf>,
|
||||
next_event_id: AtomicU64,
|
||||
// Tracks running commands by call_id, including the associated item id.
|
||||
running_commands: HashMap<String, RunningCommand>,
|
||||
running_patch_applies: HashMap<String, PatchApplyBeginEvent>,
|
||||
// Tracks the todo list for the current turn (at most one per turn).
|
||||
running_todo_list: Option<RunningTodoList>,
|
||||
last_total_token_usage: Option<codex_core::protocol::TokenUsage>,
|
||||
running_mcp_tool_calls: HashMap<String, RunningMcpToolCall>,
|
||||
last_critical_error: Option<ThreadErrorEvent>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct RunningCommand {
|
||||
command: String,
|
||||
item_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct RunningTodoList {
|
||||
item_id: String,
|
||||
items: Vec<TodoItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct RunningMcpToolCall {
|
||||
server: String,
|
||||
tool: String,
|
||||
item_id: String,
|
||||
arguments: JsonValue,
|
||||
}
|
||||
|
||||
impl EventProcessorWithJsonOutput {
|
||||
pub fn new(last_message_path: Option<PathBuf>) -> Self {
|
||||
Self {
|
||||
last_message_path,
|
||||
next_event_id: AtomicU64::new(0),
|
||||
running_commands: HashMap::new(),
|
||||
running_patch_applies: HashMap::new(),
|
||||
running_todo_list: None,
|
||||
last_total_token_usage: None,
|
||||
running_mcp_tool_calls: HashMap::new(),
|
||||
last_critical_error: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn collect_thread_events(&mut self, event: &Event) -> Vec<ThreadEvent> {
|
||||
match &event.msg {
|
||||
EventMsg::SessionConfigured(ev) => self.handle_session_configured(ev),
|
||||
EventMsg::AgentMessage(ev) => self.handle_agent_message(ev),
|
||||
EventMsg::AgentReasoning(ev) => self.handle_reasoning_event(ev),
|
||||
EventMsg::ExecCommandBegin(ev) => self.handle_exec_command_begin(ev),
|
||||
EventMsg::ExecCommandEnd(ev) => self.handle_exec_command_end(ev),
|
||||
EventMsg::McpToolCallBegin(ev) => self.handle_mcp_tool_call_begin(ev),
|
||||
EventMsg::McpToolCallEnd(ev) => self.handle_mcp_tool_call_end(ev),
|
||||
EventMsg::PatchApplyBegin(ev) => self.handle_patch_apply_begin(ev),
|
||||
EventMsg::PatchApplyEnd(ev) => self.handle_patch_apply_end(ev),
|
||||
EventMsg::WebSearchBegin(_) => Vec::new(),
|
||||
EventMsg::WebSearchEnd(ev) => self.handle_web_search_end(ev),
|
||||
EventMsg::TokenCount(ev) => {
|
||||
if let Some(info) = &ev.info {
|
||||
self.last_total_token_usage = Some(info.total_token_usage.clone());
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
EventMsg::TaskStarted(ev) => self.handle_task_started(ev),
|
||||
EventMsg::TaskComplete(_) => self.handle_task_complete(),
|
||||
EventMsg::Error(ev) => {
|
||||
let error = ThreadErrorEvent {
|
||||
message: ev.message.clone(),
|
||||
};
|
||||
self.last_critical_error = Some(error.clone());
|
||||
vec![ThreadEvent::Error(error)]
|
||||
}
|
||||
EventMsg::Warning(ev) => {
|
||||
let item = ThreadItem {
|
||||
id: self.get_next_item_id(),
|
||||
details: ThreadItemDetails::Error(ErrorItem {
|
||||
message: ev.message.clone(),
|
||||
}),
|
||||
};
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
EventMsg::StreamError(ev) => vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: ev.message.clone(),
|
||||
})],
|
||||
EventMsg::PlanUpdate(ev) => self.handle_plan_update(ev),
|
||||
_ => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_next_item_id(&self) -> String {
|
||||
format!(
|
||||
"item_{}",
|
||||
self.next_event_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
|
||||
)
|
||||
}
|
||||
|
||||
fn handle_session_configured(&self, payload: &SessionConfiguredEvent) -> Vec<ThreadEvent> {
|
||||
vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
|
||||
thread_id: payload.session_id.to_string(),
|
||||
})]
|
||||
}
|
||||
|
||||
fn handle_web_search_end(&self, ev: &WebSearchEndEvent) -> Vec<ThreadEvent> {
|
||||
let item = ThreadItem {
|
||||
id: self.get_next_item_id(),
|
||||
details: ThreadItemDetails::WebSearch(WebSearchItem {
|
||||
query: ev.query.clone(),
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
|
||||
fn handle_agent_message(&self, payload: &AgentMessageEvent) -> Vec<ThreadEvent> {
|
||||
let item = ThreadItem {
|
||||
id: self.get_next_item_id(),
|
||||
|
||||
details: ThreadItemDetails::AgentMessage(AgentMessageItem {
|
||||
text: payload.message.clone(),
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
|
||||
fn handle_reasoning_event(&self, ev: &AgentReasoningEvent) -> Vec<ThreadEvent> {
|
||||
let item = ThreadItem {
|
||||
id: self.get_next_item_id(),
|
||||
|
||||
details: ThreadItemDetails::Reasoning(ReasoningItem {
|
||||
text: ev.text.clone(),
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
fn handle_exec_command_begin(&mut self, ev: &ExecCommandBeginEvent) -> Vec<ThreadEvent> {
|
||||
let item_id = self.get_next_item_id();
|
||||
|
||||
let command_string = match shlex::try_join(ev.command.iter().map(String::as_str)) {
|
||||
Ok(command_string) => command_string,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
call_id = ev.call_id,
|
||||
"Failed to stringify command: {e:?}; skipping item.started"
|
||||
);
|
||||
ev.command.join(" ")
|
||||
}
|
||||
};
|
||||
|
||||
self.running_commands.insert(
|
||||
ev.call_id.clone(),
|
||||
RunningCommand {
|
||||
command: command_string.clone(),
|
||||
item_id: item_id.clone(),
|
||||
},
|
||||
);
|
||||
|
||||
let item = ThreadItem {
|
||||
id: item_id,
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: command_string,
|
||||
aggregated_output: String::new(),
|
||||
exit_code: None,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
|
||||
}
|
||||
|
||||
fn handle_mcp_tool_call_begin(&mut self, ev: &McpToolCallBeginEvent) -> Vec<ThreadEvent> {
|
||||
let item_id = self.get_next_item_id();
|
||||
let server = ev.invocation.server.clone();
|
||||
let tool = ev.invocation.tool.clone();
|
||||
let arguments = ev.invocation.arguments.clone().unwrap_or(JsonValue::Null);
|
||||
|
||||
self.running_mcp_tool_calls.insert(
|
||||
ev.call_id.clone(),
|
||||
RunningMcpToolCall {
|
||||
server: server.clone(),
|
||||
tool: tool.clone(),
|
||||
item_id: item_id.clone(),
|
||||
arguments: arguments.clone(),
|
||||
},
|
||||
);
|
||||
|
||||
let item = ThreadItem {
|
||||
id: item_id,
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server,
|
||||
tool,
|
||||
arguments,
|
||||
result: None,
|
||||
error: None,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
|
||||
}
|
||||
|
||||
fn handle_mcp_tool_call_end(&mut self, ev: &McpToolCallEndEvent) -> Vec<ThreadEvent> {
|
||||
let status = if ev.is_success() {
|
||||
McpToolCallStatus::Completed
|
||||
} else {
|
||||
McpToolCallStatus::Failed
|
||||
};
|
||||
|
||||
let (server, tool, item_id, arguments) =
|
||||
match self.running_mcp_tool_calls.remove(&ev.call_id) {
|
||||
Some(running) => (
|
||||
running.server,
|
||||
running.tool,
|
||||
running.item_id,
|
||||
running.arguments,
|
||||
),
|
||||
None => {
|
||||
warn!(
|
||||
call_id = ev.call_id,
|
||||
"Received McpToolCallEnd without begin; synthesizing new item"
|
||||
);
|
||||
(
|
||||
ev.invocation.server.clone(),
|
||||
ev.invocation.tool.clone(),
|
||||
self.get_next_item_id(),
|
||||
ev.invocation.arguments.clone().unwrap_or(JsonValue::Null),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let (result, error) = match &ev.result {
|
||||
Ok(value) => {
|
||||
let result = McpToolCallItemResult {
|
||||
content: value.content.clone(),
|
||||
structured_content: value.structured_content.clone(),
|
||||
};
|
||||
(Some(result), None)
|
||||
}
|
||||
Err(message) => (
|
||||
None,
|
||||
Some(McpToolCallItemError {
|
||||
message: message.clone(),
|
||||
}),
|
||||
),
|
||||
};
|
||||
|
||||
let item = ThreadItem {
|
||||
id: item_id,
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server,
|
||||
tool,
|
||||
arguments,
|
||||
result,
|
||||
error,
|
||||
status,
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
|
||||
fn handle_patch_apply_begin(&mut self, ev: &PatchApplyBeginEvent) -> Vec<ThreadEvent> {
|
||||
self.running_patch_applies
|
||||
.insert(ev.call_id.clone(), ev.clone());
|
||||
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn map_change_kind(&self, kind: &FileChange) -> PatchChangeKind {
|
||||
match kind {
|
||||
FileChange::Add { .. } => PatchChangeKind::Add,
|
||||
FileChange::Delete { .. } => PatchChangeKind::Delete,
|
||||
FileChange::Update { .. } => PatchChangeKind::Update,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_patch_apply_end(&mut self, ev: &PatchApplyEndEvent) -> Vec<ThreadEvent> {
|
||||
if let Some(running_patch_apply) = self.running_patch_applies.remove(&ev.call_id) {
|
||||
let status = if ev.success {
|
||||
PatchApplyStatus::Completed
|
||||
} else {
|
||||
PatchApplyStatus::Failed
|
||||
};
|
||||
let item = ThreadItem {
|
||||
id: self.get_next_item_id(),
|
||||
|
||||
details: ThreadItemDetails::FileChange(FileChangeItem {
|
||||
changes: running_patch_apply
|
||||
.changes
|
||||
.iter()
|
||||
.map(|(path, change)| FileUpdateChange {
|
||||
path: path.to_str().unwrap_or("").to_string(),
|
||||
kind: self.map_change_kind(change),
|
||||
})
|
||||
.collect(),
|
||||
status,
|
||||
}),
|
||||
};
|
||||
|
||||
return vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })];
|
||||
}
|
||||
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn handle_exec_command_end(&mut self, ev: &ExecCommandEndEvent) -> Vec<ThreadEvent> {
|
||||
let Some(RunningCommand { command, item_id }) = self.running_commands.remove(&ev.call_id)
|
||||
else {
|
||||
warn!(
|
||||
call_id = ev.call_id,
|
||||
"ExecCommandEnd without matching ExecCommandBegin; skipping item.completed"
|
||||
);
|
||||
return Vec::new();
|
||||
};
|
||||
let status = if ev.exit_code == 0 {
|
||||
CommandExecutionStatus::Completed
|
||||
} else {
|
||||
CommandExecutionStatus::Failed
|
||||
};
|
||||
let item = ThreadItem {
|
||||
id: item_id,
|
||||
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command,
|
||||
aggregated_output: ev.aggregated_output.clone(),
|
||||
exit_code: Some(ev.exit_code),
|
||||
status,
|
||||
}),
|
||||
};
|
||||
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
|
||||
fn todo_items_from_plan(&self, args: &UpdatePlanArgs) -> Vec<TodoItem> {
|
||||
args.plan
|
||||
.iter()
|
||||
.map(|p| TodoItem {
|
||||
text: p.step.clone(),
|
||||
completed: matches!(p.status, StepStatus::Completed),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn handle_plan_update(&mut self, args: &UpdatePlanArgs) -> Vec<ThreadEvent> {
|
||||
let items = self.todo_items_from_plan(args);
|
||||
|
||||
if let Some(running) = &mut self.running_todo_list {
|
||||
running.items = items.clone();
|
||||
let item = ThreadItem {
|
||||
id: running.item_id.clone(),
|
||||
details: ThreadItemDetails::TodoList(TodoListItem { items }),
|
||||
};
|
||||
return vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent { item })];
|
||||
}
|
||||
|
||||
let item_id = self.get_next_item_id();
|
||||
self.running_todo_list = Some(RunningTodoList {
|
||||
item_id: item_id.clone(),
|
||||
items: items.clone(),
|
||||
});
|
||||
let item = ThreadItem {
|
||||
id: item_id,
|
||||
details: ThreadItemDetails::TodoList(TodoListItem { items }),
|
||||
};
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
|
||||
}
|
||||
|
||||
fn handle_task_started(&mut self, _: &TaskStartedEvent) -> Vec<ThreadEvent> {
|
||||
self.last_critical_error = None;
|
||||
vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]
|
||||
}
|
||||
|
||||
fn handle_task_complete(&mut self) -> Vec<ThreadEvent> {
|
||||
let usage = if let Some(u) = &self.last_total_token_usage {
|
||||
Usage {
|
||||
input_tokens: u.input_tokens,
|
||||
cached_input_tokens: u.cached_input_tokens,
|
||||
output_tokens: u.output_tokens,
|
||||
}
|
||||
} else {
|
||||
Usage::default()
|
||||
};
|
||||
|
||||
let mut items = Vec::new();
|
||||
|
||||
if let Some(running) = self.running_todo_list.take() {
|
||||
let item = ThreadItem {
|
||||
id: running.item_id,
|
||||
details: ThreadItemDetails::TodoList(TodoListItem {
|
||||
items: running.items,
|
||||
}),
|
||||
};
|
||||
items.push(ThreadEvent::ItemCompleted(ItemCompletedEvent { item }));
|
||||
}
|
||||
|
||||
if let Some(error) = self.last_critical_error.take() {
|
||||
items.push(ThreadEvent::TurnFailed(TurnFailedEvent { error }));
|
||||
} else {
|
||||
items.push(ThreadEvent::TurnCompleted(TurnCompletedEvent { usage }));
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
}
|
||||
|
||||
impl EventProcessor for EventProcessorWithJsonOutput {
|
||||
fn print_config_summary(&mut self, _: &Config, _: &str, ev: &SessionConfiguredEvent) {
|
||||
self.process_event(Event {
|
||||
id: "".to_string(),
|
||||
msg: EventMsg::SessionConfigured(ev.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus {
|
||||
let aggregated = self.collect_thread_events(&event);
|
||||
for conv_event in aggregated {
|
||||
match serde_json::to_string(&conv_event) {
|
||||
Ok(line) => {
|
||||
println!("{line}");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to serialize event: {e:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let Event { msg, .. } = event;
|
||||
|
||||
if let EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) = msg {
|
||||
if let Some(output_file) = self.last_message_path.as_deref() {
|
||||
handle_last_message(last_agent_message.as_deref(), output_file);
|
||||
}
|
||||
CodexStatus::InitiateShutdown
|
||||
} else {
|
||||
CodexStatus::Running
|
||||
}
|
||||
}
|
||||
}
|
||||
246
llmx-rs/exec/src/exec_events.rs
Normal file
246
llmx-rs/exec/src/exec_events.rs
Normal file
@@ -0,0 +1,246 @@
|
||||
use mcp_types::ContentBlock as McpContentBlock;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
use ts_rs::TS;
|
||||
|
||||
/// Top-level JSONL events emitted by codex exec
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ThreadEvent {
|
||||
/// Emitted when a new thread is started as the first event.
|
||||
#[serde(rename = "thread.started")]
|
||||
ThreadStarted(ThreadStartedEvent),
|
||||
/// Emitted when a turn is started by sending a new prompt to the model.
|
||||
/// A turn encompasses all events that happen while agent is processing the prompt.
|
||||
#[serde(rename = "turn.started")]
|
||||
TurnStarted(TurnStartedEvent),
|
||||
/// Emitted when a turn is completed. Typically right after the assistant's response.
|
||||
#[serde(rename = "turn.completed")]
|
||||
TurnCompleted(TurnCompletedEvent),
|
||||
/// Indicates that a turn failed with an error.
|
||||
#[serde(rename = "turn.failed")]
|
||||
TurnFailed(TurnFailedEvent),
|
||||
/// Emitted when a new item is added to the thread. Typically the item will be in an "in progress" state.
|
||||
#[serde(rename = "item.started")]
|
||||
ItemStarted(ItemStartedEvent),
|
||||
/// Emitted when an item is updated.
|
||||
#[serde(rename = "item.updated")]
|
||||
ItemUpdated(ItemUpdatedEvent),
|
||||
/// Signals that an item has reached a terminal state—either success or failure.
|
||||
#[serde(rename = "item.completed")]
|
||||
ItemCompleted(ItemCompletedEvent),
|
||||
/// Represents an unrecoverable error emitted directly by the event stream.
|
||||
#[serde(rename = "error")]
|
||||
Error(ThreadErrorEvent),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ThreadStartedEvent {
|
||||
/// The identified of the new thread. Can be used to resume the thread later.
|
||||
pub thread_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
|
||||
|
||||
pub struct TurnStartedEvent {}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct TurnCompletedEvent {
|
||||
pub usage: Usage,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct TurnFailedEvent {
|
||||
pub error: ThreadErrorEvent,
|
||||
}
|
||||
|
||||
/// Describes the usage of tokens during a turn.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
|
||||
pub struct Usage {
|
||||
/// The number of input tokens used during the turn.
|
||||
pub input_tokens: i64,
|
||||
/// The number of cached input tokens used during the turn.
|
||||
pub cached_input_tokens: i64,
|
||||
/// The number of output tokens used during the turn.
|
||||
pub output_tokens: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ItemStartedEvent {
|
||||
pub item: ThreadItem,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ItemCompletedEvent {
|
||||
pub item: ThreadItem,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ItemUpdatedEvent {
|
||||
pub item: ThreadItem,
|
||||
}
|
||||
|
||||
/// Fatal error emitted by the stream.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ThreadErrorEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Canonical representation of a thread item and its domain-specific payload.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ThreadItem {
|
||||
pub id: String,
|
||||
#[serde(flatten)]
|
||||
pub details: ThreadItemDetails,
|
||||
}
|
||||
|
||||
/// Typed payloads for each supported thread item type.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ThreadItemDetails {
|
||||
/// Response from the agent.
|
||||
/// Either a natural-language response or a JSON string when structured output is requested.
|
||||
AgentMessage(AgentMessageItem),
|
||||
/// Agent's reasoning summary.
|
||||
Reasoning(ReasoningItem),
|
||||
/// Tracks a command executed by the agent. The item starts when the command is
|
||||
/// spawned, and completes when the process exits with an exit code.
|
||||
CommandExecution(CommandExecutionItem),
|
||||
/// Represents a set of file changes by the agent. The item is emitted only as a
|
||||
/// completed event once the patch succeeds or fails.
|
||||
FileChange(FileChangeItem),
|
||||
/// Represents a call to an MCP tool. The item starts when the invocation is
|
||||
/// dispatched and completes when the MCP server reports success or failure.
|
||||
McpToolCall(McpToolCallItem),
|
||||
/// Captures a web search request. It starts when the search is kicked off
|
||||
/// and completes when results are returned to the agent.
|
||||
WebSearch(WebSearchItem),
|
||||
/// Tracks the agent's running to-do list. It starts when the plan is first
|
||||
/// issued, updates as steps change state, and completes when the turn ends.
|
||||
TodoList(TodoListItem),
|
||||
/// Describes a non-fatal error surfaced as an item.
|
||||
Error(ErrorItem),
|
||||
}
|
||||
|
||||
/// Response from the agent.
|
||||
/// Either a natural-language response or a JSON string when structured output is requested.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct AgentMessageItem {
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
/// Agent's reasoning summary.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ReasoningItem {
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
/// The status of a command execution.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum CommandExecutionStatus {
|
||||
#[default]
|
||||
InProgress,
|
||||
Completed,
|
||||
Failed,
|
||||
}
|
||||
|
||||
/// A command executed by the agent.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct CommandExecutionItem {
|
||||
pub command: String,
|
||||
pub aggregated_output: String,
|
||||
pub exit_code: Option<i32>,
|
||||
pub status: CommandExecutionStatus,
|
||||
}
|
||||
|
||||
/// A set of file changes by the agent.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct FileUpdateChange {
|
||||
pub path: String,
|
||||
pub kind: PatchChangeKind,
|
||||
}
|
||||
|
||||
/// The status of a file change.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PatchApplyStatus {
|
||||
Completed,
|
||||
Failed,
|
||||
}
|
||||
|
||||
/// A set of file changes by the agent.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct FileChangeItem {
|
||||
pub changes: Vec<FileUpdateChange>,
|
||||
pub status: PatchApplyStatus,
|
||||
}
|
||||
|
||||
/// Indicates the type of the file change.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PatchChangeKind {
|
||||
Add,
|
||||
Delete,
|
||||
Update,
|
||||
}
|
||||
|
||||
/// The status of an MCP tool call.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum McpToolCallStatus {
|
||||
#[default]
|
||||
InProgress,
|
||||
Completed,
|
||||
Failed,
|
||||
}
|
||||
|
||||
/// Result payload produced by an MCP tool invocation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct McpToolCallItemResult {
|
||||
pub content: Vec<McpContentBlock>,
|
||||
pub structured_content: Option<JsonValue>,
|
||||
}
|
||||
|
||||
/// Error details reported by a failed MCP tool invocation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct McpToolCallItemError {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// A call to an MCP tool.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct McpToolCallItem {
|
||||
pub server: String,
|
||||
pub tool: String,
|
||||
#[serde(default)]
|
||||
pub arguments: JsonValue,
|
||||
pub result: Option<McpToolCallItemResult>,
|
||||
pub error: Option<McpToolCallItemError>,
|
||||
pub status: McpToolCallStatus,
|
||||
}
|
||||
|
||||
/// A web search request.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct WebSearchItem {
|
||||
pub query: String,
|
||||
}
|
||||
|
||||
/// An error notification.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct ErrorItem {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// An item in agent's to-do list.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct TodoItem {
|
||||
pub text: String,
|
||||
pub completed: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
pub struct TodoListItem {
|
||||
pub items: Vec<TodoItem>,
|
||||
}
|
||||
427
llmx-rs/exec/src/lib.rs
Normal file
427
llmx-rs/exec/src/lib.rs
Normal file
@@ -0,0 +1,427 @@
|
||||
// - In the default output mode, it is paramount that the only thing written to
|
||||
// stdout is the final message (if any).
|
||||
// - In --json mode, stdout must be valid JSONL, one event per line.
|
||||
// For both modes, any other output must be written to stderr.
|
||||
#![deny(clippy::print_stdout)]
|
||||
|
||||
mod cli;
|
||||
mod event_processor;
|
||||
mod event_processor_with_human_output;
|
||||
pub mod event_processor_with_jsonl_output;
|
||||
pub mod exec_events;
|
||||
|
||||
pub use cli::Cli;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::auth::enforce_login_restrictions;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::git_info::get_git_repo_root;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SessionSource;
|
||||
use codex_ollama::DEFAULT_OSS_MODEL;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use event_processor_with_human_output::EventProcessorWithHumanOutput;
|
||||
use event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
|
||||
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
|
||||
use serde_json::Value;
|
||||
use std::io::IsTerminal;
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
use supports_color::Stream;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
use tracing::info;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
use crate::cli::Command as ExecCommand;
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
use codex_core::default_client::set_default_originator;
|
||||
use codex_core::find_conversation_path_by_id_str;
|
||||
|
||||
pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
|
||||
if let Err(err) = set_default_originator("codex_exec".to_string()) {
|
||||
tracing::warn!(?err, "Failed to set codex exec originator override {err:?}");
|
||||
}
|
||||
|
||||
let Cli {
|
||||
command,
|
||||
images,
|
||||
model: model_cli_arg,
|
||||
oss,
|
||||
config_profile,
|
||||
full_auto,
|
||||
dangerously_bypass_approvals_and_sandbox,
|
||||
cwd,
|
||||
skip_git_repo_check,
|
||||
color,
|
||||
last_message_file,
|
||||
json: json_mode,
|
||||
sandbox_mode: sandbox_mode_cli_arg,
|
||||
prompt,
|
||||
output_schema: output_schema_path,
|
||||
config_overrides,
|
||||
} = cli;
|
||||
|
||||
// Determine the prompt source (parent or subcommand) and read from stdin if needed.
|
||||
let prompt_arg = match &command {
|
||||
// Allow prompt before the subcommand by falling back to the parent-level prompt
|
||||
// when the Resume subcommand did not provide its own prompt.
|
||||
Some(ExecCommand::Resume(args)) => args.prompt.clone().or(prompt),
|
||||
None => prompt,
|
||||
};
|
||||
|
||||
let prompt = match prompt_arg {
|
||||
Some(p) if p != "-" => p,
|
||||
// Either `-` was passed or no positional arg.
|
||||
maybe_dash => {
|
||||
// When no arg (None) **and** stdin is a TTY, bail out early – unless the
|
||||
// user explicitly forced reading via `-`.
|
||||
let force_stdin = matches!(maybe_dash.as_deref(), Some("-"));
|
||||
|
||||
if std::io::stdin().is_terminal() && !force_stdin {
|
||||
eprintln!(
|
||||
"No prompt provided. Either specify one as an argument or pipe the prompt into stdin."
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
// Ensure the user knows we are waiting on stdin, as they may
|
||||
// have gotten into this state by mistake. If so, and they are not
|
||||
// writing to stdin, Codex will hang indefinitely, so this should
|
||||
// help them debug in that case.
|
||||
if !force_stdin {
|
||||
eprintln!("Reading prompt from stdin...");
|
||||
}
|
||||
let mut buffer = String::new();
|
||||
if let Err(e) = std::io::stdin().read_to_string(&mut buffer) {
|
||||
eprintln!("Failed to read prompt from stdin: {e}");
|
||||
std::process::exit(1);
|
||||
} else if buffer.trim().is_empty() {
|
||||
eprintln!("No prompt provided via stdin.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
buffer
|
||||
}
|
||||
};
|
||||
|
||||
let output_schema = load_output_schema(output_schema_path);
|
||||
|
||||
let (stdout_with_ansi, stderr_with_ansi) = match color {
|
||||
cli::Color::Always => (true, true),
|
||||
cli::Color::Never => (false, false),
|
||||
cli::Color::Auto => (
|
||||
supports_color::on_cached(Stream::Stdout).is_some(),
|
||||
supports_color::on_cached(Stream::Stderr).is_some(),
|
||||
),
|
||||
};
|
||||
|
||||
// Build fmt layer (existing logging) to compose with OTEL layer.
|
||||
let default_level = "error";
|
||||
|
||||
// Build env_filter separately and attach via with_filter.
|
||||
let env_filter = EnvFilter::try_from_default_env()
|
||||
.or_else(|_| EnvFilter::try_new(default_level))
|
||||
.unwrap_or_else(|_| EnvFilter::new(default_level));
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_ansi(stderr_with_ansi)
|
||||
.with_writer(std::io::stderr)
|
||||
.with_filter(env_filter);
|
||||
|
||||
let sandbox_mode = if full_auto {
|
||||
Some(SandboxMode::WorkspaceWrite)
|
||||
} else if dangerously_bypass_approvals_and_sandbox {
|
||||
Some(SandboxMode::DangerFullAccess)
|
||||
} else {
|
||||
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
|
||||
};
|
||||
|
||||
// When using `--oss`, let the bootstrapper pick the model (defaulting to
|
||||
// gpt-oss:20b) and ensure it is present locally. Also, force the built‑in
|
||||
// `oss` model provider.
|
||||
let model = if let Some(model) = model_cli_arg {
|
||||
Some(model)
|
||||
} else if oss {
|
||||
Some(DEFAULT_OSS_MODEL.to_owned())
|
||||
} else {
|
||||
None // No model specified, will use the default.
|
||||
};
|
||||
|
||||
let model_provider = if oss {
|
||||
Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
|
||||
} else {
|
||||
None // No specific model provider override.
|
||||
};
|
||||
|
||||
// Load configuration and determine approval policy
|
||||
let overrides = ConfigOverrides {
|
||||
model,
|
||||
review_model: None,
|
||||
config_profile,
|
||||
// Default to never ask for approvals in headless mode. Feature flags can override.
|
||||
approval_policy: Some(AskForApproval::Never),
|
||||
sandbox_mode,
|
||||
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
|
||||
model_provider,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
developer_instructions: None,
|
||||
compact_prompt: None,
|
||||
include_apply_patch_tool: None,
|
||||
show_raw_agent_reasoning: oss.then_some(true),
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
|
||||
|
||||
if let Err(err) = enforce_login_restrictions(&config).await {
|
||||
eprintln!("{err}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let otel = codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION"));
|
||||
|
||||
#[allow(clippy::print_stderr)]
|
||||
let otel = match otel {
|
||||
Ok(otel) => otel,
|
||||
Err(e) => {
|
||||
eprintln!("Could not create otel exporter: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(provider) = otel.as_ref() {
|
||||
let otel_layer = OpenTelemetryTracingBridge::new(&provider.logger).with_filter(
|
||||
tracing_subscriber::filter::filter_fn(codex_core::otel_init::codex_export_filter),
|
||||
);
|
||||
|
||||
let _ = tracing_subscriber::registry()
|
||||
.with(fmt_layer)
|
||||
.with(otel_layer)
|
||||
.try_init();
|
||||
} else {
|
||||
let _ = tracing_subscriber::registry().with(fmt_layer).try_init();
|
||||
}
|
||||
|
||||
let mut event_processor: Box<dyn EventProcessor> = match json_mode {
|
||||
true => Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone())),
|
||||
_ => Box::new(EventProcessorWithHumanOutput::create_with_ansi(
|
||||
stdout_with_ansi,
|
||||
&config,
|
||||
last_message_file.clone(),
|
||||
)),
|
||||
};
|
||||
|
||||
if oss {
|
||||
codex_ollama::ensure_oss_ready(&config)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
|
||||
}
|
||||
|
||||
let default_cwd = config.cwd.to_path_buf();
|
||||
let default_approval_policy = config.approval_policy;
|
||||
let default_sandbox_policy = config.sandbox_policy.clone();
|
||||
let default_model = config.model.clone();
|
||||
let default_effort = config.model_reasoning_effort;
|
||||
let default_summary = config.model_reasoning_summary;
|
||||
|
||||
if !skip_git_repo_check && get_git_repo_root(&default_cwd).is_none() {
|
||||
eprintln!("Not inside a trusted directory and --skip-git-repo-check was not specified.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let auth_manager = AuthManager::shared(
|
||||
config.codex_home.clone(),
|
||||
true,
|
||||
config.cli_auth_credentials_store_mode,
|
||||
);
|
||||
let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
|
||||
|
||||
// Handle resume subcommand by resolving a rollout path and using explicit resume API.
|
||||
let NewConversation {
|
||||
conversation_id: _,
|
||||
conversation,
|
||||
session_configured,
|
||||
} = if let Some(ExecCommand::Resume(args)) = command {
|
||||
let resume_path = resolve_resume_path(&config, &args).await?;
|
||||
|
||||
if let Some(path) = resume_path {
|
||||
conversation_manager
|
||||
.resume_conversation_from_rollout(config.clone(), path, auth_manager.clone())
|
||||
.await?
|
||||
} else {
|
||||
conversation_manager
|
||||
.new_conversation(config.clone())
|
||||
.await?
|
||||
}
|
||||
} else {
|
||||
conversation_manager
|
||||
.new_conversation(config.clone())
|
||||
.await?
|
||||
};
|
||||
// Print the effective configuration and prompt so users can see what Codex
|
||||
// is using.
|
||||
event_processor.print_config_summary(&config, &prompt, &session_configured);
|
||||
|
||||
info!("Codex initialized with event: {session_configured:?}");
|
||||
|
||||
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Event>();
|
||||
{
|
||||
let conversation = conversation.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = tokio::signal::ctrl_c() => {
|
||||
tracing::debug!("Keyboard interrupt");
|
||||
// Immediately notify Codex to abort any in‑flight task.
|
||||
conversation.submit(Op::Interrupt).await.ok();
|
||||
|
||||
// Exit the inner loop and return to the main input prompt. The codex
|
||||
// will emit a `TurnInterrupted` (Error) event which is drained later.
|
||||
break;
|
||||
}
|
||||
res = conversation.next_event() => match res {
|
||||
Ok(event) => {
|
||||
debug!("Received event: {event:?}");
|
||||
|
||||
let is_shutdown_complete = matches!(event.msg, EventMsg::ShutdownComplete);
|
||||
if let Err(e) = tx.send(event) {
|
||||
error!("Error sending event: {e:?}");
|
||||
break;
|
||||
}
|
||||
if is_shutdown_complete {
|
||||
info!("Received shutdown event, exiting event loop.");
|
||||
break;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!("Error receiving event: {e:?}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Package images and prompt into a single user input turn.
|
||||
let mut items: Vec<UserInput> = images
|
||||
.into_iter()
|
||||
.map(|path| UserInput::LocalImage { path })
|
||||
.collect();
|
||||
items.push(UserInput::Text { text: prompt });
|
||||
let initial_prompt_task_id = conversation
|
||||
.submit(Op::UserTurn {
|
||||
items,
|
||||
cwd: default_cwd,
|
||||
approval_policy: default_approval_policy,
|
||||
sandbox_policy: default_sandbox_policy,
|
||||
model: default_model,
|
||||
effort: default_effort,
|
||||
summary: default_summary,
|
||||
final_output_json_schema: output_schema,
|
||||
})
|
||||
.await?;
|
||||
info!("Sent prompt with event ID: {initial_prompt_task_id}");
|
||||
|
||||
// Run the loop until the task is complete.
|
||||
// Track whether a fatal error was reported by the server so we can
|
||||
// exit with a non-zero status for automation-friendly signaling.
|
||||
let mut error_seen = false;
|
||||
while let Some(event) = rx.recv().await {
|
||||
if matches!(event.msg, EventMsg::Error(_)) {
|
||||
error_seen = true;
|
||||
}
|
||||
let shutdown: CodexStatus = event_processor.process_event(event);
|
||||
match shutdown {
|
||||
CodexStatus::Running => continue,
|
||||
CodexStatus::InitiateShutdown => {
|
||||
conversation.submit(Op::Shutdown).await?;
|
||||
}
|
||||
CodexStatus::Shutdown => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
event_processor.print_final_output();
|
||||
if error_seen {
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn resolve_resume_path(
|
||||
config: &Config,
|
||||
args: &crate::cli::ResumeArgs,
|
||||
) -> anyhow::Result<Option<PathBuf>> {
|
||||
if args.last {
|
||||
let default_provider_filter = vec![config.model_provider_id.clone()];
|
||||
match codex_core::RolloutRecorder::list_conversations(
|
||||
&config.codex_home,
|
||||
1,
|
||||
None,
|
||||
&[],
|
||||
Some(default_provider_filter.as_slice()),
|
||||
&config.model_provider_id,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(page) => Ok(page.items.first().map(|it| it.path.clone())),
|
||||
Err(e) => {
|
||||
error!("Error listing conversations: {e}");
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
} else if let Some(id_str) = args.session_id.as_deref() {
|
||||
let path = find_conversation_path_by_id_str(&config.codex_home, id_str).await?;
|
||||
Ok(path)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn load_output_schema(path: Option<PathBuf>) -> Option<Value> {
|
||||
let path = path?;
|
||||
|
||||
let schema_str = match std::fs::read_to_string(&path) {
|
||||
Ok(contents) => contents,
|
||||
Err(err) => {
|
||||
eprintln!(
|
||||
"Failed to read output schema file {}: {err}",
|
||||
path.display()
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
match serde_json::from_str::<Value>(&schema_str) {
|
||||
Ok(value) => Some(value),
|
||||
Err(err) => {
|
||||
eprintln!(
|
||||
"Output schema file {} is not valid JSON: {err}",
|
||||
path.display()
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
40
llmx-rs/exec/src/main.rs
Normal file
40
llmx-rs/exec/src/main.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
//! Entry-point for the `codex-exec` binary.
|
||||
//!
|
||||
//! When this CLI is invoked normally, it parses the standard `codex-exec` CLI
|
||||
//! options and launches the non-interactive Codex agent. However, if it is
|
||||
//! invoked with arg0 as `codex-linux-sandbox`, we instead treat the invocation
|
||||
//! as a request to run the logic for the standalone `codex-linux-sandbox`
|
||||
//! executable (i.e., parse any -s args and then run a *sandboxed* command under
|
||||
//! Landlock + seccomp.
|
||||
//!
|
||||
//! This allows us to ship a completely separate set of functionality as part
|
||||
//! of the `codex-exec` binary.
|
||||
use clap::Parser;
|
||||
use codex_arg0::arg0_dispatch_or_else;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_exec::Cli;
|
||||
use codex_exec::run_main;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct TopCli {
|
||||
#[clap(flatten)]
|
||||
config_overrides: CliConfigOverrides,
|
||||
|
||||
#[clap(flatten)]
|
||||
inner: Cli,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
|
||||
let top_cli = TopCli::parse();
|
||||
// Merge root-level overrides into inner CLI struct so downstream logic remains unchanged.
|
||||
let mut inner = top_cli.inner;
|
||||
inner
|
||||
.config_overrides
|
||||
.raw_overrides
|
||||
.splice(0..0, top_cli.config_overrides.raw_overrides);
|
||||
|
||||
run_main(inner, codex_linux_sandbox_exe).await?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
5
llmx-rs/exec/tests/all.rs
Normal file
5
llmx-rs/exec/tests/all.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
// Single integration test binary that aggregates all test modules.
|
||||
// The submodules live in `tests/suite/`.
|
||||
mod suite;
|
||||
|
||||
mod event_processor_with_json_output;
|
||||
939
llmx-rs/exec/tests/event_processor_with_json_output.rs
Normal file
939
llmx-rs/exec/tests/event_processor_with_json_output.rs
Normal file
@@ -0,0 +1,939 @@
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::AgentReasoningEvent;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::McpInvocation;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
|
||||
use codex_exec::exec_events::AgentMessageItem;
|
||||
use codex_exec::exec_events::CommandExecutionItem;
|
||||
use codex_exec::exec_events::CommandExecutionStatus;
|
||||
use codex_exec::exec_events::ErrorItem;
|
||||
use codex_exec::exec_events::ItemCompletedEvent;
|
||||
use codex_exec::exec_events::ItemStartedEvent;
|
||||
use codex_exec::exec_events::ItemUpdatedEvent;
|
||||
use codex_exec::exec_events::McpToolCallItem;
|
||||
use codex_exec::exec_events::McpToolCallItemError;
|
||||
use codex_exec::exec_events::McpToolCallItemResult;
|
||||
use codex_exec::exec_events::McpToolCallStatus;
|
||||
use codex_exec::exec_events::PatchApplyStatus;
|
||||
use codex_exec::exec_events::PatchChangeKind;
|
||||
use codex_exec::exec_events::ReasoningItem;
|
||||
use codex_exec::exec_events::ThreadErrorEvent;
|
||||
use codex_exec::exec_events::ThreadEvent;
|
||||
use codex_exec::exec_events::ThreadItem;
|
||||
use codex_exec::exec_events::ThreadItemDetails;
|
||||
use codex_exec::exec_events::ThreadStartedEvent;
|
||||
use codex_exec::exec_events::TodoItem as ExecTodoItem;
|
||||
use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
|
||||
use codex_exec::exec_events::TurnCompletedEvent;
|
||||
use codex_exec::exec_events::TurnFailedEvent;
|
||||
use codex_exec::exec_events::TurnStartedEvent;
|
||||
use codex_exec::exec_events::Usage;
|
||||
use codex_exec::exec_events::WebSearchItem;
|
||||
use codex_protocol::plan_tool::PlanItemArg;
|
||||
use codex_protocol::plan_tool::StepStatus;
|
||||
use codex_protocol::plan_tool::UpdatePlanArgs;
|
||||
use mcp_types::CallToolResult;
|
||||
use mcp_types::ContentBlock;
|
||||
use mcp_types::TextContent;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
fn event(id: &str, msg: EventMsg) -> Event {
|
||||
Event {
|
||||
id: id.to_string(),
|
||||
msg,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn session_configured_produces_thread_started_event() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let session_id =
|
||||
codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")
|
||||
.unwrap();
|
||||
let rollout_path = PathBuf::from("/tmp/rollout.json");
|
||||
let ev = event(
|
||||
"e1",
|
||||
EventMsg::SessionConfigured(SessionConfiguredEvent {
|
||||
session_id,
|
||||
model: "codex-mini-latest".to_string(),
|
||||
reasoning_effort: None,
|
||||
history_log_id: 0,
|
||||
history_entry_count: 0,
|
||||
initial_messages: None,
|
||||
rollout_path,
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&ev);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
|
||||
thread_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_started_produces_turn_started_event() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"t1",
|
||||
EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
|
||||
model_context_window: Some(32_000),
|
||||
}),
|
||||
));
|
||||
|
||||
assert_eq!(out, vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn web_search_end_emits_item_completed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let query = "rust async await".to_string();
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"w1",
|
||||
EventMsg::WebSearchEnd(WebSearchEndEvent {
|
||||
call_id: "call-123".to_string(),
|
||||
query: query.clone(),
|
||||
}),
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_update_emits_todo_list_started_updated_and_completed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First plan update => item.started (todo_list)
|
||||
let first = event(
|
||||
"p1",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![
|
||||
PlanItemArg {
|
||||
step: "step one".to_string(),
|
||||
status: StepStatus::Pending,
|
||||
},
|
||||
PlanItemArg {
|
||||
step: "step two".to_string(),
|
||||
status: StepStatus::InProgress,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
let out_first = ep.collect_thread_events(&first);
|
||||
assert_eq!(
|
||||
out_first,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: false
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// Second plan update in same turn => item.updated (same id)
|
||||
let second = event(
|
||||
"p2",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![
|
||||
PlanItemArg {
|
||||
step: "step one".to_string(),
|
||||
status: StepStatus::Completed,
|
||||
},
|
||||
PlanItemArg {
|
||||
step: "step two".to_string(),
|
||||
status: StepStatus::InProgress,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
let out_second = ep.collect_thread_events(&second);
|
||||
assert_eq!(
|
||||
out_second,
|
||||
vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: true
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// Task completes => item.completed (same id, latest state)
|
||||
let complete = event(
|
||||
"p3",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: None,
|
||||
}),
|
||||
);
|
||||
let out_complete = ep.collect_thread_events(&complete);
|
||||
assert_eq!(
|
||||
out_complete,
|
||||
vec![
|
||||
ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: true
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
}),
|
||||
ThreadEvent::TurnCompleted(TurnCompletedEvent {
|
||||
usage: Usage::default(),
|
||||
}),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_tool_call_begin_and_end_emit_item_events() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let invocation = McpInvocation {
|
||||
server: "server_a".to_string(),
|
||||
tool: "tool_x".to_string(),
|
||||
arguments: Some(json!({ "key": "value" })),
|
||||
};
|
||||
|
||||
let begin = event(
|
||||
"m1",
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
invocation: invocation.clone(),
|
||||
}),
|
||||
);
|
||||
let begin_events = ep.collect_thread_events(&begin);
|
||||
assert_eq!(
|
||||
begin_events,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_a".to_string(),
|
||||
tool: "tool_x".to_string(),
|
||||
arguments: json!({ "key": "value" }),
|
||||
result: None,
|
||||
error: None,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
let end = event(
|
||||
"m2",
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
invocation,
|
||||
duration: Duration::from_secs(1),
|
||||
result: Ok(CallToolResult {
|
||||
content: Vec::new(),
|
||||
is_error: None,
|
||||
structured_content: None,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
let end_events = ep.collect_thread_events(&end);
|
||||
assert_eq!(
|
||||
end_events,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_a".to_string(),
|
||||
tool: "tool_x".to_string(),
|
||||
arguments: json!({ "key": "value" }),
|
||||
result: Some(McpToolCallItemResult {
|
||||
content: Vec::new(),
|
||||
structured_content: None,
|
||||
}),
|
||||
error: None,
|
||||
status: McpToolCallStatus::Completed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_tool_call_failure_sets_failed_status() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let invocation = McpInvocation {
|
||||
server: "server_b".to_string(),
|
||||
tool: "tool_y".to_string(),
|
||||
arguments: Some(json!({ "param": 42 })),
|
||||
};
|
||||
|
||||
let begin = event(
|
||||
"m3",
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
invocation: invocation.clone(),
|
||||
}),
|
||||
);
|
||||
ep.collect_thread_events(&begin);
|
||||
|
||||
let end = event(
|
||||
"m4",
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
invocation,
|
||||
duration: Duration::from_millis(5),
|
||||
result: Err("tool exploded".to_string()),
|
||||
}),
|
||||
);
|
||||
let events = ep.collect_thread_events(&end);
|
||||
assert_eq!(
|
||||
events,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_b".to_string(),
|
||||
tool: "tool_y".to_string(),
|
||||
arguments: json!({ "param": 42 }),
|
||||
result: None,
|
||||
error: Some(McpToolCallItemError {
|
||||
message: "tool exploded".to_string(),
|
||||
}),
|
||||
status: McpToolCallStatus::Failed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let invocation = McpInvocation {
|
||||
server: "server_c".to_string(),
|
||||
tool: "tool_z".to_string(),
|
||||
arguments: None,
|
||||
};
|
||||
|
||||
let begin = event(
|
||||
"m5",
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: "call-3".to_string(),
|
||||
invocation: invocation.clone(),
|
||||
}),
|
||||
);
|
||||
let begin_events = ep.collect_thread_events(&begin);
|
||||
assert_eq!(
|
||||
begin_events,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_c".to_string(),
|
||||
tool: "tool_z".to_string(),
|
||||
arguments: serde_json::Value::Null,
|
||||
result: None,
|
||||
error: None,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
let end = event(
|
||||
"m6",
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: "call-3".to_string(),
|
||||
invocation,
|
||||
duration: Duration::from_millis(10),
|
||||
result: Ok(CallToolResult {
|
||||
content: vec![ContentBlock::TextContent(TextContent {
|
||||
annotations: None,
|
||||
text: "done".to_string(),
|
||||
r#type: "text".to_string(),
|
||||
})],
|
||||
is_error: None,
|
||||
structured_content: Some(json!({ "status": "ok" })),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
let events = ep.collect_thread_events(&end);
|
||||
assert_eq!(
|
||||
events,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_c".to_string(),
|
||||
tool: "tool_z".to_string(),
|
||||
arguments: serde_json::Value::Null,
|
||||
result: Some(McpToolCallItemResult {
|
||||
content: vec![ContentBlock::TextContent(TextContent {
|
||||
annotations: None,
|
||||
text: "done".to_string(),
|
||||
r#type: "text".to_string(),
|
||||
})],
|
||||
structured_content: Some(json!({ "status": "ok" })),
|
||||
}),
|
||||
error: None,
|
||||
status: McpToolCallStatus::Completed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First turn: start + complete
|
||||
let start = event(
|
||||
"t1",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![PlanItemArg {
|
||||
step: "only".to_string(),
|
||||
status: StepStatus::Pending,
|
||||
}],
|
||||
}),
|
||||
);
|
||||
let _ = ep.collect_thread_events(&start);
|
||||
let complete = event(
|
||||
"t2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: None,
|
||||
}),
|
||||
);
|
||||
let _ = ep.collect_thread_events(&complete);
|
||||
|
||||
// Second turn: a new todo list should have a new id
|
||||
let start_again = event(
|
||||
"t3",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![PlanItemArg {
|
||||
step: "again".to_string(),
|
||||
status: StepStatus::Pending,
|
||||
}],
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&start_again);
|
||||
|
||||
match &out[0] {
|
||||
ThreadEvent::ItemStarted(ItemStartedEvent { item }) => {
|
||||
assert_eq!(&item.id, "item_1");
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_reasoning_produces_item_completed_reasoning() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let ev = event(
|
||||
"e1",
|
||||
EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: "thinking...".to_string(),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&ev);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::Reasoning(ReasoningItem {
|
||||
text: "thinking...".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_message_produces_item_completed_agent_message() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let ev = event(
|
||||
"e1",
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "hello".to_string(),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&ev);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::AgentMessage(AgentMessageItem {
|
||||
text: "hello".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_event_produces_error() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::Error(codex_core::protocol::ErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn warning_event_produces_error_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::Warning(WarningEvent {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::Error(ErrorItem {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_error_event_produces_error() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
|
||||
message: "retrying".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: "retrying".to_string(),
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_followed_by_task_complete_produces_turn_failed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
let error_event = event(
|
||||
"e1",
|
||||
EventMsg::Error(ErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
ep.collect_thread_events(&error_event),
|
||||
vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
})]
|
||||
);
|
||||
|
||||
let complete_event = event(
|
||||
"e2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: None,
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
ep.collect_thread_events(&complete_event),
|
||||
vec![ThreadEvent::TurnFailed(TurnFailedEvent {
|
||||
error: ThreadErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_end_success_produces_completed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "1".to_string(),
|
||||
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
is_user_shell_command: false,
|
||||
}),
|
||||
);
|
||||
let out_begin = ep.collect_thread_events(&begin);
|
||||
assert_eq!(
|
||||
out_begin,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "bash -lc 'echo hi'".to_string(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: None,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// End (success) -> item.completed (item_0)
|
||||
let end_ok = event(
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "1".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: "hi\n".to_string(),
|
||||
exit_code: 0,
|
||||
duration: Duration::from_millis(5),
|
||||
formatted_output: String::new(),
|
||||
}),
|
||||
);
|
||||
let out_ok = ep.collect_thread_events(&end_ok);
|
||||
assert_eq!(
|
||||
out_ok,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "bash -lc 'echo hi'".to_string(),
|
||||
aggregated_output: "hi\n".to_string(),
|
||||
exit_code: Some(0),
|
||||
status: CommandExecutionStatus::Completed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_end_failure_produces_failed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "2".to_string(),
|
||||
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
is_user_shell_command: false,
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
ep.collect_thread_events(&begin),
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "sh -c 'exit 1'".to_string(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: None,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// End (failure) -> item.completed (item_0)
|
||||
let end_fail = event(
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "2".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: 1,
|
||||
duration: Duration::from_millis(2),
|
||||
formatted_output: String::new(),
|
||||
}),
|
||||
);
|
||||
let out_fail = ep.collect_thread_events(&end_fail);
|
||||
assert_eq!(
|
||||
out_fail,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "sh -c 'exit 1'".to_string(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: Some(1),
|
||||
status: CommandExecutionStatus::Failed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_end_without_begin_is_ignored() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// End event arrives without a prior Begin; should produce no thread events.
|
||||
let end_only = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "no-begin".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: 0,
|
||||
duration: Duration::from_millis(1),
|
||||
formatted_output: String::new(),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&end_only);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn patch_apply_success_produces_item_completed_patchapply() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// Prepare a patch with multiple kinds of changes
|
||||
let mut changes = std::collections::HashMap::new();
|
||||
changes.insert(
|
||||
PathBuf::from("a/added.txt"),
|
||||
FileChange::Add {
|
||||
content: "+hello".to_string(),
|
||||
},
|
||||
);
|
||||
changes.insert(
|
||||
PathBuf::from("b/deleted.txt"),
|
||||
FileChange::Delete {
|
||||
content: "-goodbye".to_string(),
|
||||
},
|
||||
);
|
||||
changes.insert(
|
||||
PathBuf::from("c/modified.txt"),
|
||||
FileChange::Update {
|
||||
unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
|
||||
move_path: Some(PathBuf::from("c/renamed.txt")),
|
||||
},
|
||||
);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"p1",
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
auto_approved: true,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
);
|
||||
let out_begin = ep.collect_thread_events(&begin);
|
||||
assert!(out_begin.is_empty());
|
||||
|
||||
// End (success) -> item.completed (item_0)
|
||||
let end = event(
|
||||
"p2",
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
stdout: "applied 3 changes".to_string(),
|
||||
stderr: String::new(),
|
||||
success: true,
|
||||
}),
|
||||
);
|
||||
let out_end = ep.collect_thread_events(&end);
|
||||
assert_eq!(out_end.len(), 1);
|
||||
|
||||
// Validate structure without relying on HashMap iteration order
|
||||
match &out_end[0] {
|
||||
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
|
||||
assert_eq!(&item.id, "item_0");
|
||||
match &item.details {
|
||||
ThreadItemDetails::FileChange(file_update) => {
|
||||
assert_eq!(file_update.status, PatchApplyStatus::Completed);
|
||||
|
||||
let mut actual: Vec<(String, PatchChangeKind)> = file_update
|
||||
.changes
|
||||
.iter()
|
||||
.map(|c| (c.path.clone(), c.kind.clone()))
|
||||
.collect();
|
||||
actual.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
let mut expected = vec![
|
||||
("a/added.txt".to_string(), PatchChangeKind::Add),
|
||||
("b/deleted.txt".to_string(), PatchChangeKind::Delete),
|
||||
("c/modified.txt".to_string(), PatchChangeKind::Update),
|
||||
];
|
||||
expected.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
other => panic!("unexpected details: {other:?}"),
|
||||
}
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn patch_apply_failure_produces_item_completed_patchapply_failed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
let mut changes = std::collections::HashMap::new();
|
||||
changes.insert(
|
||||
PathBuf::from("file.txt"),
|
||||
FileChange::Update {
|
||||
unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
|
||||
move_path: None,
|
||||
},
|
||||
);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"p1",
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
auto_approved: false,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
);
|
||||
assert!(ep.collect_thread_events(&begin).is_empty());
|
||||
|
||||
// End (failure) -> item.completed (item_0) with Failed status
|
||||
let end = event(
|
||||
"p2",
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: "failed to apply".to_string(),
|
||||
success: false,
|
||||
}),
|
||||
);
|
||||
let out_end = ep.collect_thread_events(&end);
|
||||
assert_eq!(out_end.len(), 1);
|
||||
|
||||
match &out_end[0] {
|
||||
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
|
||||
assert_eq!(&item.id, "item_0");
|
||||
match &item.details {
|
||||
ThreadItemDetails::FileChange(file_update) => {
|
||||
assert_eq!(file_update.status, PatchApplyStatus::Failed);
|
||||
assert_eq!(file_update.changes.len(), 1);
|
||||
assert_eq!(file_update.changes[0].path, "file.txt".to_string());
|
||||
assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
|
||||
}
|
||||
other => panic!("unexpected details: {other:?}"),
|
||||
}
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_complete_produces_turn_completed_with_usage() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First, feed a TokenCount event with known totals.
|
||||
let usage = codex_core::protocol::TokenUsage {
|
||||
input_tokens: 1200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 345,
|
||||
reasoning_output_tokens: 0,
|
||||
total_tokens: 0,
|
||||
};
|
||||
let info = codex_core::protocol::TokenUsageInfo {
|
||||
total_token_usage: usage.clone(),
|
||||
last_token_usage: usage,
|
||||
model_context_window: None,
|
||||
};
|
||||
let token_count_event = event(
|
||||
"e1",
|
||||
EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
|
||||
info: Some(info),
|
||||
rate_limits: None,
|
||||
}),
|
||||
);
|
||||
assert!(ep.collect_thread_events(&token_count_event).is_empty());
|
||||
|
||||
// Then TaskComplete should produce turn.completed with the captured usage.
|
||||
let complete_event = event(
|
||||
"e2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: Some("done".to_string()),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&complete_event);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::TurnCompleted(TurnCompletedEvent {
|
||||
usage: Usage {
|
||||
input_tokens: 1200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 345,
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
4
llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
vendored
Normal file
4
llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
class BaseClass:
|
||||
def method():
|
||||
|
||||
return True
|
||||
10
llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
vendored
Normal file
10
llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
event: response.created
|
||||
data: {"type":"response.created","response":{"id":"resp1"}}
|
||||
|
||||
event: response.output_item.done
|
||||
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
|
||||
|
||||
event: response.completed
|
||||
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
|
||||
|
||||
|
||||
151
llmx-rs/exec/tests/suite/apply_patch.rs
Normal file
151
llmx-rs/exec/tests/suite/apply_patch.rs
Normal file
@@ -0,0 +1,151 @@
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]
|
||||
|
||||
use anyhow::Context;
|
||||
use assert_cmd::prelude::*;
|
||||
use codex_core::CODEX_APPLY_PATCH_ARG1;
|
||||
use core_test_support::responses::ev_apply_patch_custom_tool_call;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
|
||||
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
|
||||
/// emulate the `apply_patch` CLI.
|
||||
#[test]
|
||||
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let relative_path = "source.txt";
|
||||
let absolute_path = tmp.path().join(relative_path);
|
||||
fs::write(&absolute_path, "original content\n")?;
|
||||
|
||||
Command::cargo_bin("codex-exec")
|
||||
.context("should find binary for codex-exec")?
|
||||
.arg(CODEX_APPLY_PATCH_ARG1)
|
||||
.arg(
|
||||
r#"*** Begin Patch
|
||||
*** Update File: source.txt
|
||||
@@
|
||||
-original content
|
||||
+modified by apply_patch
|
||||
*** End Patch"#,
|
||||
)
|
||||
.current_dir(tmp.path())
|
||||
.assert()
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM source.txt\n")
|
||||
.stderr(predicates::str::is_empty());
|
||||
assert_eq!(
|
||||
fs::read_to_string(absolute_path)?,
|
||||
"modified by apply_patch\n"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let test = test_codex_exec();
|
||||
let tmp_path = test.cwd_path().to_path_buf();
|
||||
let add_patch = r#"*** Begin Patch
|
||||
*** Add File: test.md
|
||||
+Hello world
|
||||
*** End Patch"#;
|
||||
let update_patch = r#"*** Begin Patch
|
||||
*** Update File: test.md
|
||||
@@
|
||||
-Hello world
|
||||
+Final text
|
||||
*** End Patch"#;
|
||||
let response_streams = vec![
|
||||
sse(vec![
|
||||
ev_apply_patch_custom_tool_call("request_0", add_patch),
|
||||
ev_completed("request_0"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_apply_patch_function_call("request_1", update_patch),
|
||||
ev_completed("request_1"),
|
||||
]),
|
||||
sse(vec![ev_completed("request_2")]),
|
||||
];
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let final_path = tmp_path.join("test.md");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(contents, "Final text\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let test = test_codex_exec();
|
||||
let freeform_add_patch = r#"*** Begin Patch
|
||||
*** Add File: app.py
|
||||
+class BaseClass:
|
||||
+ def method():
|
||||
+ return False
|
||||
*** End Patch"#;
|
||||
let freeform_update_patch = r#"*** Begin Patch
|
||||
*** Update File: app.py
|
||||
@@ def method():
|
||||
- return False
|
||||
+
|
||||
+ return True
|
||||
*** End Patch"#;
|
||||
let response_streams = vec![
|
||||
sse(vec![
|
||||
ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
|
||||
ev_completed("request_0"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
|
||||
ev_completed("request_1"),
|
||||
]),
|
||||
sse(vec![ev_completed("request_2")]),
|
||||
];
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify final file contents
|
||||
let final_path = test.cwd_path().join("app.py");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(
|
||||
contents,
|
||||
include_str!("../fixtures/apply_patch_freeform_final.txt")
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
30
llmx-rs/exec/tests/suite/auth_env.rs
Normal file
30
llmx-rs/exec/tests/suite/auth_env.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::header;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
header("Authorization", "Bearer dummy"),
|
||||
sse(vec![ev_completed("request_0")]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("echo testing codex api key")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
8
llmx-rs/exec/tests/suite/mod.rs
Normal file
8
llmx-rs/exec/tests/suite/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
mod apply_patch;
|
||||
mod auth_env;
|
||||
mod originator;
|
||||
mod output_schema;
|
||||
mod resume;
|
||||
mod sandbox;
|
||||
mod server_error_exit;
|
||||
52
llmx-rs/exec/tests/suite/originator.rs
Normal file
52
llmx-rs/exec/tests/suite/originator.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::header;
|
||||
|
||||
/// Verify that when the server reports an error, `codex-exec` exits with a
|
||||
/// non-zero status code so automation can detect failures.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn send_codex_exec_originator() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![
|
||||
responses::ev_response_created("response_1"),
|
||||
responses::ev_assistant_message("response_1", "Hello, world!"),
|
||||
responses::ev_completed("response_1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, header("Originator", "codex_exec"), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.assert()
|
||||
.code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn supports_originator_override() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![
|
||||
responses::ev_response_created("response_1"),
|
||||
responses::ev_assistant_message("response_1", "Hello, world!"),
|
||||
responses::ev_completed("response_1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, header("Originator", "codex_exec_override"), body)
|
||||
.await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.env("CODEX_INTERNAL_ORIGINATOR_OVERRIDE", "codex_exec_override")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.assert()
|
||||
.code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
63
llmx-rs/exec/tests/suite/output_schema.rs
Normal file
63
llmx-rs/exec/tests/suite/output_schema.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use serde_json::Value;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let schema_contents = serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"answer": { "type": "string" }
|
||||
},
|
||||
"required": ["answer"],
|
||||
"additionalProperties": false
|
||||
});
|
||||
let schema_path = test.cwd_path().join("schema.json");
|
||||
std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
|
||||
let expected_schema: Value = schema_contents;
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![
|
||||
responses::ev_response_created("resp1"),
|
||||
responses::ev_assistant_message("m1", "fixture hello"),
|
||||
responses::ev_completed("resp1"),
|
||||
]);
|
||||
let response_mock = responses::mount_sse_once_match(&server, any(), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
// keep using -C in the test to exercise the flag as well
|
||||
.arg("-C")
|
||||
.arg(test.cwd_path())
|
||||
.arg("--output-schema")
|
||||
.arg(&schema_path)
|
||||
.arg("-m")
|
||||
.arg("gpt-5")
|
||||
.arg("tell me a joke")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let request = response_mock.single_request();
|
||||
let payload: Value = request.body_json();
|
||||
let text = payload.get("text").expect("request missing text field");
|
||||
let format = text
|
||||
.get("format")
|
||||
.expect("request missing text.format field");
|
||||
assert_eq!(
|
||||
format,
|
||||
&serde_json::json!({
|
||||
"name": "codex_output_schema",
|
||||
"type": "json_schema",
|
||||
"strict": true,
|
||||
"schema": expected_schema,
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
257
llmx-rs/exec/tests/suite/resume.rs
Normal file
257
llmx-rs/exec/tests/suite/resume.rs
Normal file
@@ -0,0 +1,257 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use anyhow::Context;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use serde_json::Value;
|
||||
use std::path::Path;
|
||||
use std::string::ToString;
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Utility: scan the sessions dir for a rollout file that contains `marker`
|
||||
/// in any response_item.message.content entry. Returns the absolute path.
|
||||
fn find_session_file_containing_marker(
|
||||
sessions_dir: &std::path::Path,
|
||||
marker: &str,
|
||||
) -> Option<std::path::PathBuf> {
|
||||
for entry in WalkDir::new(sessions_dir) {
|
||||
let entry = match entry {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
|
||||
continue;
|
||||
}
|
||||
let path = entry.path();
|
||||
let Ok(content) = std::fs::read_to_string(path) else {
|
||||
continue;
|
||||
};
|
||||
// Skip the first meta line and scan remaining JSONL entries.
|
||||
let mut lines = content.lines();
|
||||
if lines.next().is_none() {
|
||||
continue;
|
||||
}
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let Ok(item): Result<Value, _> = serde_json::from_str(line) else {
|
||||
continue;
|
||||
};
|
||||
if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
|
||||
&& let Some(payload) = item.get("payload")
|
||||
&& payload.get("type").and_then(|t| t.as_str()) == Some("message")
|
||||
&& payload
|
||||
.get("content")
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_default()
|
||||
.contains(marker)
|
||||
{
|
||||
return Some(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Extract the conversation UUID from the first SessionMeta line in the rollout file.
|
||||
fn extract_conversation_id(path: &std::path::Path) -> String {
|
||||
let content = std::fs::read_to_string(path).unwrap();
|
||||
let mut lines = content.lines();
|
||||
let meta_line = lines.next().expect("missing meta line");
|
||||
let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json");
|
||||
meta.get("payload")
|
||||
.and_then(|p| p.get("id"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let fixture =
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
|
||||
|
||||
// 1) First run: create a session with a unique marker in the content.
|
||||
let marker = format!("resume-last-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Find the created session file containing the marker.
|
||||
let sessions_dir = test.home_path().join("sessions");
|
||||
let path = find_session_file_containing_marker(&sessions_dir, &marker)
|
||||
.expect("no session file found after first run");
|
||||
|
||||
// 2) Second run: resume the most recent file with a new marker.
|
||||
let marker2 = format!("resume-last-2-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg("--last")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Ensure the same file was updated and contains both markers.
|
||||
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
|
||||
.expect("no resumed session file containing marker2");
|
||||
assert_eq!(
|
||||
resumed_path, path,
|
||||
"resume --last should append to existing file"
|
||||
);
|
||||
let content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(content.contains(&marker));
|
||||
assert!(content.contains(&marker2));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let fixture =
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
|
||||
|
||||
// 1) First run: create a session
|
||||
let marker = format!("resume-by-id-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let sessions_dir = test.home_path().join("sessions");
|
||||
let path = find_session_file_containing_marker(&sessions_dir, &marker)
|
||||
.expect("no session file found after first run");
|
||||
let session_id = extract_conversation_id(&path);
|
||||
assert!(
|
||||
!session_id.is_empty(),
|
||||
"missing conversation id in meta line"
|
||||
);
|
||||
|
||||
// 2) Resume by id
|
||||
let marker2 = format!("resume-by-id-2-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg(&session_id)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
|
||||
.expect("no resumed session file containing marker2");
|
||||
assert_eq!(
|
||||
resumed_path, path,
|
||||
"resume by id should append to existing file"
|
||||
);
|
||||
let content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(content.contains(&marker));
|
||||
assert!(content.contains(&marker2));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let fixture =
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
|
||||
|
||||
let marker = format!("resume-config-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let sessions_dir = test.home_path().join("sessions");
|
||||
let path = find_session_file_containing_marker(&sessions_dir, &marker)
|
||||
.expect("no session file found after first run");
|
||||
|
||||
let marker2 = format!("resume-config-2-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
|
||||
let output = test
|
||||
.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5-high")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg("--last")
|
||||
.output()
|
||||
.context("resume run should succeed")?;
|
||||
|
||||
assert!(output.status.success(), "resume run failed: {output:?}");
|
||||
|
||||
let stderr = String::from_utf8(output.stderr)?;
|
||||
assert!(
|
||||
stderr.contains("model: gpt-5-high"),
|
||||
"stderr missing model override: {stderr}"
|
||||
);
|
||||
if cfg!(target_os = "windows") {
|
||||
assert!(
|
||||
stderr.contains("sandbox: read-only"),
|
||||
"stderr missing downgraded sandbox note: {stderr}"
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
stderr.contains("sandbox: workspace-write"),
|
||||
"stderr missing sandbox override: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
|
||||
.expect("no resumed session file containing marker2");
|
||||
assert_eq!(resumed_path, path, "resume should append to same file");
|
||||
|
||||
let content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(content.contains(&marker));
|
||||
assert!(content.contains(&marker2));
|
||||
Ok(())
|
||||
}
|
||||
322
llmx-rs/exec/tests/suite/sandbox.rs
Normal file
322
llmx-rs/exec/tests/suite/sandbox.rs
Normal file
@@ -0,0 +1,322 @@
|
||||
#![cfg(unix)]
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::spawn::StdioPolicy;
|
||||
use std::collections::HashMap;
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use tokio::fs::create_dir_all;
|
||||
use tokio::process::Child;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
async fn spawn_command_under_sandbox(
|
||||
command: Vec<String>,
|
||||
command_cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_cwd: &Path,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
spawn_command_under_seatbelt(
|
||||
command,
|
||||
command_cwd,
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
async fn spawn_command_under_sandbox(
|
||||
command: Vec<String>,
|
||||
command_cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_cwd: &Path,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::landlock::spawn_command_under_linux_sandbox;
|
||||
let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
|
||||
spawn_command_under_linux_sandbox(
|
||||
codex_linux_sandbox_exe,
|
||||
command,
|
||||
command_cwd,
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn python_multiprocessing_lock_works_under_sandbox() {
|
||||
core_test_support::skip_if_sandbox!();
|
||||
#[cfg(target_os = "macos")]
|
||||
let writable_roots = Vec::<PathBuf>::new();
|
||||
|
||||
// From https://man7.org/linux/man-pages/man7/sem_overview.7.html
|
||||
//
|
||||
// > On Linux, named semaphores are created in a virtual filesystem,
|
||||
// > normally mounted under /dev/shm.
|
||||
#[cfg(target_os = "linux")]
|
||||
let writable_roots = vec![PathBuf::from("/dev/shm")];
|
||||
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
};
|
||||
|
||||
let python_code = r#"import multiprocessing
|
||||
from multiprocessing import Lock, Process
|
||||
|
||||
def f(lock):
|
||||
with lock:
|
||||
print("Lock acquired in child process")
|
||||
|
||||
if __name__ == '__main__':
|
||||
lock = Lock()
|
||||
p = Process(target=f, args=(lock,))
|
||||
p.start()
|
||||
p.join()
|
||||
"#;
|
||||
|
||||
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
python_code.to_string(),
|
||||
],
|
||||
command_cwd,
|
||||
&policy,
|
||||
sandbox_cwd.as_path(),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn python under sandbox");
|
||||
|
||||
let status = child.wait().await.expect("should wait for child process");
|
||||
assert!(status.success(), "python exited with {status:?}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_distinguishes_command_and_policy_cwds() {
|
||||
core_test_support::skip_if_sandbox!();
|
||||
let temp = tempfile::tempdir().expect("should be able to create temp dir");
|
||||
let sandbox_root = temp.path().join("sandbox");
|
||||
let command_root = temp.path().join("command");
|
||||
create_dir_all(&sandbox_root).await.expect("mkdir");
|
||||
create_dir_all(&command_root).await.expect("mkdir");
|
||||
let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
|
||||
.await
|
||||
.expect("canonicalize sandbox root");
|
||||
let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
|
||||
|
||||
let disallowed_path = command_root.join("forbidden.txt");
|
||||
|
||||
// Note writable_roots is empty: verify that `canonical_allowed_path` is
|
||||
// writable only because it is under the sandbox policy cwd, not because it
|
||||
// is under a writable root.
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![],
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
};
|
||||
|
||||
// Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo forbidden > forbidden.txt".to_string(),
|
||||
],
|
||||
command_root.clone(),
|
||||
&policy,
|
||||
canonical_sandbox_root.as_path(),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should spawn command writing to forbidden path");
|
||||
|
||||
let status = child
|
||||
.wait()
|
||||
.await
|
||||
.expect("should wait for forbidden command");
|
||||
assert!(
|
||||
!status.success(),
|
||||
"sandbox unexpectedly allowed writing to command cwd: {status:?}"
|
||||
);
|
||||
let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
|
||||
.await
|
||||
.expect("try_exists failed");
|
||||
assert!(
|
||||
!forbidden_exists,
|
||||
"forbidden path should not have been created"
|
||||
);
|
||||
|
||||
// Writing to the sandbox policy cwd after changing directories into it should succeed.
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"/usr/bin/touch".to_string(),
|
||||
canonical_allowed_path.to_string_lossy().into_owned(),
|
||||
],
|
||||
command_root,
|
||||
&policy,
|
||||
canonical_sandbox_root.as_path(),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should spawn command writing to sandbox root");
|
||||
|
||||
let status = child.wait().await.expect("should wait for allowed command");
|
||||
assert!(
|
||||
status.success(),
|
||||
"sandbox blocked allowed write: {status:?}"
|
||||
);
|
||||
let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
|
||||
.await
|
||||
.expect("try_exists allowed failed");
|
||||
assert!(allowed_exists, "allowed path should exist");
|
||||
}
|
||||
|
||||
fn unix_sock_body() {
|
||||
unsafe {
|
||||
let mut fds = [0i32; 2];
|
||||
let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
|
||||
assert_eq!(
|
||||
r,
|
||||
0,
|
||||
"socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
let msg = b"hello_unix";
|
||||
// write() from one end (generic write is allowed)
|
||||
let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
|
||||
assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
|
||||
|
||||
// recvfrom() on the other end. We don’t need the address for socketpair,
|
||||
// so we pass null pointers for src address.
|
||||
let mut buf = [0u8; 64];
|
||||
let recvd = libc::recvfrom(
|
||||
fds[1],
|
||||
buf.as_mut_ptr() as *mut libc::c_void,
|
||||
buf.len(),
|
||||
0,
|
||||
std::ptr::null_mut(),
|
||||
std::ptr::null_mut(),
|
||||
);
|
||||
assert!(
|
||||
recvd >= 0,
|
||||
"recvfrom() failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
let recvd_slice = &buf[..(recvd as usize)];
|
||||
assert_eq!(
|
||||
recvd_slice,
|
||||
&msg[..],
|
||||
"payload mismatch: sent {} bytes, got {} bytes",
|
||||
msg.len(),
|
||||
recvd
|
||||
);
|
||||
|
||||
// Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
|
||||
let mut sfds = [0i32; 2];
|
||||
let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
|
||||
assert_eq!(
|
||||
sr,
|
||||
0,
|
||||
"socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
|
||||
assert!(
|
||||
snt2 >= 0,
|
||||
"write(stream) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
let mut b2 = [0u8; 64];
|
||||
let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
|
||||
assert!(
|
||||
rcv2 >= 0,
|
||||
"recv(stream) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
// Clean up
|
||||
let _ = libc::close(sfds[0]);
|
||||
let _ = libc::close(sfds[1]);
|
||||
let _ = libc::close(fds[0]);
|
||||
let _ = libc::close(fds[1]);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn allow_unix_socketpair_recvfrom() {
|
||||
run_code_under_sandbox(
|
||||
"allow_unix_socketpair_recvfrom",
|
||||
&SandboxPolicy::ReadOnly,
|
||||
|| async { unix_sock_body() },
|
||||
)
|
||||
.await
|
||||
.expect("should be able to reexec");
|
||||
}
|
||||
|
||||
const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
pub async fn run_code_under_sandbox<F, Fut>(
|
||||
test_selector: &str,
|
||||
policy: &SandboxPolicy,
|
||||
child_body: F,
|
||||
) -> io::Result<Option<ExitStatus>>
|
||||
where
|
||||
F: FnOnce() -> Fut + Send + 'static,
|
||||
Fut: Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
|
||||
let exe = std::env::current_exe()?;
|
||||
let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
|
||||
let mut stdio_policy = StdioPolicy::RedirectForShellTool;
|
||||
// Allow for us to pass forward --nocapture / use the right stdio policy.
|
||||
if std::env::args().any(|a| a == "--nocapture") {
|
||||
cmds.push("--nocapture".into());
|
||||
stdio_policy = StdioPolicy::Inherit;
|
||||
}
|
||||
cmds.push(test_selector.into());
|
||||
|
||||
// Your existing launcher:
|
||||
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
cmds,
|
||||
command_cwd,
|
||||
policy,
|
||||
sandbox_cwd.as_path(),
|
||||
stdio_policy,
|
||||
HashMap::from([("IN_SANDBOX".into(), "1".into())]),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let status = child.wait().await?;
|
||||
Ok(Some(status))
|
||||
} else {
|
||||
// Child branch: run the provided body.
|
||||
child_body().await;
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
34
llmx-rs/exec/tests/suite/server_error_exit.rs
Normal file
34
llmx-rs/exec/tests/suite/server_error_exit.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
/// Verify that when the server reports an error, `codex-exec` exits with a
|
||||
/// non-zero status code so automation can detect failures.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
// Mock a simple Responses API SSE stream that immediately reports a
|
||||
// `response.failed` event with an error message.
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![serde_json::json!({
|
||||
"type": "response.failed",
|
||||
"response": {
|
||||
"id": "resp_err_1",
|
||||
"error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
|
||||
}
|
||||
})]);
|
||||
responses::mount_sse_once_match(&server, any(), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.arg("--experimental-json")
|
||||
.assert()
|
||||
.code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user