Phase 1: Repository & Infrastructure Setup
- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
- Root: llmx-monorepo
- CLI: @llmx/llmx
- SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx
🤖 Generated with Claude Code
This commit is contained in:
5
llmx-rs/exec/tests/all.rs
Normal file
5
llmx-rs/exec/tests/all.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
// Single integration test binary that aggregates all test modules.
|
||||
// The submodules live in `tests/suite/`.
|
||||
mod suite;
|
||||
|
||||
mod event_processor_with_json_output;
|
||||
939
llmx-rs/exec/tests/event_processor_with_json_output.rs
Normal file
939
llmx-rs/exec/tests/event_processor_with_json_output.rs
Normal file
@@ -0,0 +1,939 @@
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::AgentReasoningEvent;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::McpInvocation;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
|
||||
use codex_exec::exec_events::AgentMessageItem;
|
||||
use codex_exec::exec_events::CommandExecutionItem;
|
||||
use codex_exec::exec_events::CommandExecutionStatus;
|
||||
use codex_exec::exec_events::ErrorItem;
|
||||
use codex_exec::exec_events::ItemCompletedEvent;
|
||||
use codex_exec::exec_events::ItemStartedEvent;
|
||||
use codex_exec::exec_events::ItemUpdatedEvent;
|
||||
use codex_exec::exec_events::McpToolCallItem;
|
||||
use codex_exec::exec_events::McpToolCallItemError;
|
||||
use codex_exec::exec_events::McpToolCallItemResult;
|
||||
use codex_exec::exec_events::McpToolCallStatus;
|
||||
use codex_exec::exec_events::PatchApplyStatus;
|
||||
use codex_exec::exec_events::PatchChangeKind;
|
||||
use codex_exec::exec_events::ReasoningItem;
|
||||
use codex_exec::exec_events::ThreadErrorEvent;
|
||||
use codex_exec::exec_events::ThreadEvent;
|
||||
use codex_exec::exec_events::ThreadItem;
|
||||
use codex_exec::exec_events::ThreadItemDetails;
|
||||
use codex_exec::exec_events::ThreadStartedEvent;
|
||||
use codex_exec::exec_events::TodoItem as ExecTodoItem;
|
||||
use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
|
||||
use codex_exec::exec_events::TurnCompletedEvent;
|
||||
use codex_exec::exec_events::TurnFailedEvent;
|
||||
use codex_exec::exec_events::TurnStartedEvent;
|
||||
use codex_exec::exec_events::Usage;
|
||||
use codex_exec::exec_events::WebSearchItem;
|
||||
use codex_protocol::plan_tool::PlanItemArg;
|
||||
use codex_protocol::plan_tool::StepStatus;
|
||||
use codex_protocol::plan_tool::UpdatePlanArgs;
|
||||
use mcp_types::CallToolResult;
|
||||
use mcp_types::ContentBlock;
|
||||
use mcp_types::TextContent;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
fn event(id: &str, msg: EventMsg) -> Event {
|
||||
Event {
|
||||
id: id.to_string(),
|
||||
msg,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn session_configured_produces_thread_started_event() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let session_id =
|
||||
codex_protocol::ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")
|
||||
.unwrap();
|
||||
let rollout_path = PathBuf::from("/tmp/rollout.json");
|
||||
let ev = event(
|
||||
"e1",
|
||||
EventMsg::SessionConfigured(SessionConfiguredEvent {
|
||||
session_id,
|
||||
model: "codex-mini-latest".to_string(),
|
||||
reasoning_effort: None,
|
||||
history_log_id: 0,
|
||||
history_entry_count: 0,
|
||||
initial_messages: None,
|
||||
rollout_path,
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&ev);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ThreadStarted(ThreadStartedEvent {
|
||||
thread_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_started_produces_turn_started_event() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"t1",
|
||||
EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
|
||||
model_context_window: Some(32_000),
|
||||
}),
|
||||
));
|
||||
|
||||
assert_eq!(out, vec![ThreadEvent::TurnStarted(TurnStartedEvent {})]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn web_search_end_emits_item_completed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let query = "rust async await".to_string();
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"w1",
|
||||
EventMsg::WebSearchEnd(WebSearchEndEvent {
|
||||
call_id: "call-123".to_string(),
|
||||
query: query.clone(),
|
||||
}),
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_update_emits_todo_list_started_updated_and_completed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First plan update => item.started (todo_list)
|
||||
let first = event(
|
||||
"p1",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![
|
||||
PlanItemArg {
|
||||
step: "step one".to_string(),
|
||||
status: StepStatus::Pending,
|
||||
},
|
||||
PlanItemArg {
|
||||
step: "step two".to_string(),
|
||||
status: StepStatus::InProgress,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
let out_first = ep.collect_thread_events(&first);
|
||||
assert_eq!(
|
||||
out_first,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: false
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// Second plan update in same turn => item.updated (same id)
|
||||
let second = event(
|
||||
"p2",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![
|
||||
PlanItemArg {
|
||||
step: "step one".to_string(),
|
||||
status: StepStatus::Completed,
|
||||
},
|
||||
PlanItemArg {
|
||||
step: "step two".to_string(),
|
||||
status: StepStatus::InProgress,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
let out_second = ep.collect_thread_events(&second);
|
||||
assert_eq!(
|
||||
out_second,
|
||||
vec![ThreadEvent::ItemUpdated(ItemUpdatedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: true
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// Task completes => item.completed (same id, latest state)
|
||||
let complete = event(
|
||||
"p3",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: None,
|
||||
}),
|
||||
);
|
||||
let out_complete = ep.collect_thread_events(&complete);
|
||||
assert_eq!(
|
||||
out_complete,
|
||||
vec![
|
||||
ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::TodoList(ExecTodoListItem {
|
||||
items: vec![
|
||||
ExecTodoItem {
|
||||
text: "step one".to_string(),
|
||||
completed: true
|
||||
},
|
||||
ExecTodoItem {
|
||||
text: "step two".to_string(),
|
||||
completed: false
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
}),
|
||||
ThreadEvent::TurnCompleted(TurnCompletedEvent {
|
||||
usage: Usage::default(),
|
||||
}),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_tool_call_begin_and_end_emit_item_events() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let invocation = McpInvocation {
|
||||
server: "server_a".to_string(),
|
||||
tool: "tool_x".to_string(),
|
||||
arguments: Some(json!({ "key": "value" })),
|
||||
};
|
||||
|
||||
let begin = event(
|
||||
"m1",
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
invocation: invocation.clone(),
|
||||
}),
|
||||
);
|
||||
let begin_events = ep.collect_thread_events(&begin);
|
||||
assert_eq!(
|
||||
begin_events,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_a".to_string(),
|
||||
tool: "tool_x".to_string(),
|
||||
arguments: json!({ "key": "value" }),
|
||||
result: None,
|
||||
error: None,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
let end = event(
|
||||
"m2",
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
invocation,
|
||||
duration: Duration::from_secs(1),
|
||||
result: Ok(CallToolResult {
|
||||
content: Vec::new(),
|
||||
is_error: None,
|
||||
structured_content: None,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
let end_events = ep.collect_thread_events(&end);
|
||||
assert_eq!(
|
||||
end_events,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_a".to_string(),
|
||||
tool: "tool_x".to_string(),
|
||||
arguments: json!({ "key": "value" }),
|
||||
result: Some(McpToolCallItemResult {
|
||||
content: Vec::new(),
|
||||
structured_content: None,
|
||||
}),
|
||||
error: None,
|
||||
status: McpToolCallStatus::Completed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_tool_call_failure_sets_failed_status() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let invocation = McpInvocation {
|
||||
server: "server_b".to_string(),
|
||||
tool: "tool_y".to_string(),
|
||||
arguments: Some(json!({ "param": 42 })),
|
||||
};
|
||||
|
||||
let begin = event(
|
||||
"m3",
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
invocation: invocation.clone(),
|
||||
}),
|
||||
);
|
||||
ep.collect_thread_events(&begin);
|
||||
|
||||
let end = event(
|
||||
"m4",
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
invocation,
|
||||
duration: Duration::from_millis(5),
|
||||
result: Err("tool exploded".to_string()),
|
||||
}),
|
||||
);
|
||||
let events = ep.collect_thread_events(&end);
|
||||
assert_eq!(
|
||||
events,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_b".to_string(),
|
||||
tool: "tool_y".to_string(),
|
||||
arguments: json!({ "param": 42 }),
|
||||
result: None,
|
||||
error: Some(McpToolCallItemError {
|
||||
message: "tool exploded".to_string(),
|
||||
}),
|
||||
status: McpToolCallStatus::Failed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let invocation = McpInvocation {
|
||||
server: "server_c".to_string(),
|
||||
tool: "tool_z".to_string(),
|
||||
arguments: None,
|
||||
};
|
||||
|
||||
let begin = event(
|
||||
"m5",
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: "call-3".to_string(),
|
||||
invocation: invocation.clone(),
|
||||
}),
|
||||
);
|
||||
let begin_events = ep.collect_thread_events(&begin);
|
||||
assert_eq!(
|
||||
begin_events,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_c".to_string(),
|
||||
tool: "tool_z".to_string(),
|
||||
arguments: serde_json::Value::Null,
|
||||
result: None,
|
||||
error: None,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
let end = event(
|
||||
"m6",
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: "call-3".to_string(),
|
||||
invocation,
|
||||
duration: Duration::from_millis(10),
|
||||
result: Ok(CallToolResult {
|
||||
content: vec![ContentBlock::TextContent(TextContent {
|
||||
annotations: None,
|
||||
text: "done".to_string(),
|
||||
r#type: "text".to_string(),
|
||||
})],
|
||||
is_error: None,
|
||||
structured_content: Some(json!({ "status": "ok" })),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
let events = ep.collect_thread_events(&end);
|
||||
assert_eq!(
|
||||
events,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
|
||||
server: "server_c".to_string(),
|
||||
tool: "tool_z".to_string(),
|
||||
arguments: serde_json::Value::Null,
|
||||
result: Some(McpToolCallItemResult {
|
||||
content: vec![ContentBlock::TextContent(TextContent {
|
||||
annotations: None,
|
||||
text: "done".to_string(),
|
||||
r#type: "text".to_string(),
|
||||
})],
|
||||
structured_content: Some(json!({ "status": "ok" })),
|
||||
}),
|
||||
error: None,
|
||||
status: McpToolCallStatus::Completed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First turn: start + complete
|
||||
let start = event(
|
||||
"t1",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![PlanItemArg {
|
||||
step: "only".to_string(),
|
||||
status: StepStatus::Pending,
|
||||
}],
|
||||
}),
|
||||
);
|
||||
let _ = ep.collect_thread_events(&start);
|
||||
let complete = event(
|
||||
"t2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: None,
|
||||
}),
|
||||
);
|
||||
let _ = ep.collect_thread_events(&complete);
|
||||
|
||||
// Second turn: a new todo list should have a new id
|
||||
let start_again = event(
|
||||
"t3",
|
||||
EventMsg::PlanUpdate(UpdatePlanArgs {
|
||||
explanation: None,
|
||||
plan: vec![PlanItemArg {
|
||||
step: "again".to_string(),
|
||||
status: StepStatus::Pending,
|
||||
}],
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&start_again);
|
||||
|
||||
match &out[0] {
|
||||
ThreadEvent::ItemStarted(ItemStartedEvent { item }) => {
|
||||
assert_eq!(&item.id, "item_1");
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_reasoning_produces_item_completed_reasoning() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let ev = event(
|
||||
"e1",
|
||||
EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: "thinking...".to_string(),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&ev);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::Reasoning(ReasoningItem {
|
||||
text: "thinking...".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_message_produces_item_completed_agent_message() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let ev = event(
|
||||
"e1",
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "hello".to_string(),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&ev);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::AgentMessage(AgentMessageItem {
|
||||
text: "hello".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_event_produces_error() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::Error(codex_core::protocol::ErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn warning_event_produces_error_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::Warning(WarningEvent {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::Error(ErrorItem {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_error_event_produces_error() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
|
||||
message: "retrying".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: "retrying".to_string(),
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_followed_by_task_complete_produces_turn_failed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
let error_event = event(
|
||||
"e1",
|
||||
EventMsg::Error(ErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
ep.collect_thread_events(&error_event),
|
||||
vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
})]
|
||||
);
|
||||
|
||||
let complete_event = event(
|
||||
"e2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: None,
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
ep.collect_thread_events(&complete_event),
|
||||
vec![ThreadEvent::TurnFailed(TurnFailedEvent {
|
||||
error: ThreadErrorEvent {
|
||||
message: "boom".to_string(),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_end_success_produces_completed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "1".to_string(),
|
||||
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
is_user_shell_command: false,
|
||||
}),
|
||||
);
|
||||
let out_begin = ep.collect_thread_events(&begin);
|
||||
assert_eq!(
|
||||
out_begin,
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "bash -lc 'echo hi'".to_string(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: None,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// End (success) -> item.completed (item_0)
|
||||
let end_ok = event(
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "1".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: "hi\n".to_string(),
|
||||
exit_code: 0,
|
||||
duration: Duration::from_millis(5),
|
||||
formatted_output: String::new(),
|
||||
}),
|
||||
);
|
||||
let out_ok = ep.collect_thread_events(&end_ok);
|
||||
assert_eq!(
|
||||
out_ok,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "bash -lc 'echo hi'".to_string(),
|
||||
aggregated_output: "hi\n".to_string(),
|
||||
exit_code: Some(0),
|
||||
status: CommandExecutionStatus::Completed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_end_failure_produces_failed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "2".to_string(),
|
||||
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
is_user_shell_command: false,
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
ep.collect_thread_events(&begin),
|
||||
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "sh -c 'exit 1'".to_string(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: None,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
|
||||
// End (failure) -> item.completed (item_0)
|
||||
let end_fail = event(
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "2".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: 1,
|
||||
duration: Duration::from_millis(2),
|
||||
formatted_output: String::new(),
|
||||
}),
|
||||
);
|
||||
let out_fail = ep.collect_thread_events(&end_fail);
|
||||
assert_eq!(
|
||||
out_fail,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::CommandExecution(CommandExecutionItem {
|
||||
command: "sh -c 'exit 1'".to_string(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: Some(1),
|
||||
status: CommandExecutionStatus::Failed,
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_end_without_begin_is_ignored() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// End event arrives without a prior Begin; should produce no thread events.
|
||||
let end_only = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "no-begin".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
exit_code: 0,
|
||||
duration: Duration::from_millis(1),
|
||||
formatted_output: String::new(),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&end_only);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn patch_apply_success_produces_item_completed_patchapply() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// Prepare a patch with multiple kinds of changes
|
||||
let mut changes = std::collections::HashMap::new();
|
||||
changes.insert(
|
||||
PathBuf::from("a/added.txt"),
|
||||
FileChange::Add {
|
||||
content: "+hello".to_string(),
|
||||
},
|
||||
);
|
||||
changes.insert(
|
||||
PathBuf::from("b/deleted.txt"),
|
||||
FileChange::Delete {
|
||||
content: "-goodbye".to_string(),
|
||||
},
|
||||
);
|
||||
changes.insert(
|
||||
PathBuf::from("c/modified.txt"),
|
||||
FileChange::Update {
|
||||
unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
|
||||
move_path: Some(PathBuf::from("c/renamed.txt")),
|
||||
},
|
||||
);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"p1",
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
auto_approved: true,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
);
|
||||
let out_begin = ep.collect_thread_events(&begin);
|
||||
assert!(out_begin.is_empty());
|
||||
|
||||
// End (success) -> item.completed (item_0)
|
||||
let end = event(
|
||||
"p2",
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: "call-1".to_string(),
|
||||
stdout: "applied 3 changes".to_string(),
|
||||
stderr: String::new(),
|
||||
success: true,
|
||||
}),
|
||||
);
|
||||
let out_end = ep.collect_thread_events(&end);
|
||||
assert_eq!(out_end.len(), 1);
|
||||
|
||||
// Validate structure without relying on HashMap iteration order
|
||||
match &out_end[0] {
|
||||
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
|
||||
assert_eq!(&item.id, "item_0");
|
||||
match &item.details {
|
||||
ThreadItemDetails::FileChange(file_update) => {
|
||||
assert_eq!(file_update.status, PatchApplyStatus::Completed);
|
||||
|
||||
let mut actual: Vec<(String, PatchChangeKind)> = file_update
|
||||
.changes
|
||||
.iter()
|
||||
.map(|c| (c.path.clone(), c.kind.clone()))
|
||||
.collect();
|
||||
actual.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
let mut expected = vec![
|
||||
("a/added.txt".to_string(), PatchChangeKind::Add),
|
||||
("b/deleted.txt".to_string(), PatchChangeKind::Delete),
|
||||
("c/modified.txt".to_string(), PatchChangeKind::Update),
|
||||
];
|
||||
expected.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
other => panic!("unexpected details: {other:?}"),
|
||||
}
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn patch_apply_failure_produces_item_completed_patchapply_failed() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
let mut changes = std::collections::HashMap::new();
|
||||
changes.insert(
|
||||
PathBuf::from("file.txt"),
|
||||
FileChange::Update {
|
||||
unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
|
||||
move_path: None,
|
||||
},
|
||||
);
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"p1",
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
auto_approved: false,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
);
|
||||
assert!(ep.collect_thread_events(&begin).is_empty());
|
||||
|
||||
// End (failure) -> item.completed (item_0) with Failed status
|
||||
let end = event(
|
||||
"p2",
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: "call-2".to_string(),
|
||||
stdout: String::new(),
|
||||
stderr: "failed to apply".to_string(),
|
||||
success: false,
|
||||
}),
|
||||
);
|
||||
let out_end = ep.collect_thread_events(&end);
|
||||
assert_eq!(out_end.len(), 1);
|
||||
|
||||
match &out_end[0] {
|
||||
ThreadEvent::ItemCompleted(ItemCompletedEvent { item }) => {
|
||||
assert_eq!(&item.id, "item_0");
|
||||
match &item.details {
|
||||
ThreadItemDetails::FileChange(file_update) => {
|
||||
assert_eq!(file_update.status, PatchApplyStatus::Failed);
|
||||
assert_eq!(file_update.changes.len(), 1);
|
||||
assert_eq!(file_update.changes[0].path, "file.txt".to_string());
|
||||
assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
|
||||
}
|
||||
other => panic!("unexpected details: {other:?}"),
|
||||
}
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_complete_produces_turn_completed_with_usage() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
// First, feed a TokenCount event with known totals.
|
||||
let usage = codex_core::protocol::TokenUsage {
|
||||
input_tokens: 1200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 345,
|
||||
reasoning_output_tokens: 0,
|
||||
total_tokens: 0,
|
||||
};
|
||||
let info = codex_core::protocol::TokenUsageInfo {
|
||||
total_token_usage: usage.clone(),
|
||||
last_token_usage: usage,
|
||||
model_context_window: None,
|
||||
};
|
||||
let token_count_event = event(
|
||||
"e1",
|
||||
EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
|
||||
info: Some(info),
|
||||
rate_limits: None,
|
||||
}),
|
||||
);
|
||||
assert!(ep.collect_thread_events(&token_count_event).is_empty());
|
||||
|
||||
// Then TaskComplete should produce turn.completed with the captured usage.
|
||||
let complete_event = event(
|
||||
"e2",
|
||||
EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
|
||||
last_agent_message: Some("done".to_string()),
|
||||
}),
|
||||
);
|
||||
let out = ep.collect_thread_events(&complete_event);
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::TurnCompleted(TurnCompletedEvent {
|
||||
usage: Usage {
|
||||
input_tokens: 1200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 345,
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
4
llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
vendored
Normal file
4
llmx-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
class BaseClass:
|
||||
def method():
|
||||
|
||||
return True
|
||||
10
llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
vendored
Normal file
10
llmx-rs/exec/tests/fixtures/cli_responses_fixture.sse
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
event: response.created
|
||||
data: {"type":"response.created","response":{"id":"resp1"}}
|
||||
|
||||
event: response.output_item.done
|
||||
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
|
||||
|
||||
event: response.completed
|
||||
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
|
||||
|
||||
|
||||
151
llmx-rs/exec/tests/suite/apply_patch.rs
Normal file
151
llmx-rs/exec/tests/suite/apply_patch.rs
Normal file
@@ -0,0 +1,151 @@
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]
|
||||
|
||||
use anyhow::Context;
|
||||
use assert_cmd::prelude::*;
|
||||
use codex_core::CODEX_APPLY_PATCH_ARG1;
|
||||
use core_test_support::responses::ev_apply_patch_custom_tool_call;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
|
||||
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
|
||||
/// emulate the `apply_patch` CLI.
|
||||
#[test]
|
||||
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let relative_path = "source.txt";
|
||||
let absolute_path = tmp.path().join(relative_path);
|
||||
fs::write(&absolute_path, "original content\n")?;
|
||||
|
||||
Command::cargo_bin("codex-exec")
|
||||
.context("should find binary for codex-exec")?
|
||||
.arg(CODEX_APPLY_PATCH_ARG1)
|
||||
.arg(
|
||||
r#"*** Begin Patch
|
||||
*** Update File: source.txt
|
||||
@@
|
||||
-original content
|
||||
+modified by apply_patch
|
||||
*** End Patch"#,
|
||||
)
|
||||
.current_dir(tmp.path())
|
||||
.assert()
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM source.txt\n")
|
||||
.stderr(predicates::str::is_empty());
|
||||
assert_eq!(
|
||||
fs::read_to_string(absolute_path)?,
|
||||
"modified by apply_patch\n"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let test = test_codex_exec();
|
||||
let tmp_path = test.cwd_path().to_path_buf();
|
||||
let add_patch = r#"*** Begin Patch
|
||||
*** Add File: test.md
|
||||
+Hello world
|
||||
*** End Patch"#;
|
||||
let update_patch = r#"*** Begin Patch
|
||||
*** Update File: test.md
|
||||
@@
|
||||
-Hello world
|
||||
+Final text
|
||||
*** End Patch"#;
|
||||
let response_streams = vec![
|
||||
sse(vec![
|
||||
ev_apply_patch_custom_tool_call("request_0", add_patch),
|
||||
ev_completed("request_0"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_apply_patch_function_call("request_1", update_patch),
|
||||
ev_completed("request_1"),
|
||||
]),
|
||||
sse(vec![ev_completed("request_2")]),
|
||||
];
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let final_path = tmp_path.join("test.md");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(contents, "Final text\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let test = test_codex_exec();
|
||||
let freeform_add_patch = r#"*** Begin Patch
|
||||
*** Add File: app.py
|
||||
+class BaseClass:
|
||||
+ def method():
|
||||
+ return False
|
||||
*** End Patch"#;
|
||||
let freeform_update_patch = r#"*** Begin Patch
|
||||
*** Update File: app.py
|
||||
@@ def method():
|
||||
- return False
|
||||
+
|
||||
+ return True
|
||||
*** End Patch"#;
|
||||
let response_streams = vec![
|
||||
sse(vec![
|
||||
ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
|
||||
ev_completed("request_0"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
|
||||
ev_completed("request_1"),
|
||||
]),
|
||||
sse(vec![ev_completed("request_2")]),
|
||||
];
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify final file contents
|
||||
let final_path = test.cwd_path().join("app.py");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(
|
||||
contents,
|
||||
include_str!("../fixtures/apply_patch_freeform_final.txt")
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
30
llmx-rs/exec/tests/suite/auth_env.rs
Normal file
30
llmx-rs/exec/tests/suite/auth_env.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::header;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_uses_codex_api_key_env_var() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
header("Authorization", "Bearer dummy"),
|
||||
sse(vec![ev_completed("request_0")]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("echo testing codex api key")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
8
llmx-rs/exec/tests/suite/mod.rs
Normal file
8
llmx-rs/exec/tests/suite/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
mod apply_patch;
|
||||
mod auth_env;
|
||||
mod originator;
|
||||
mod output_schema;
|
||||
mod resume;
|
||||
mod sandbox;
|
||||
mod server_error_exit;
|
||||
52
llmx-rs/exec/tests/suite/originator.rs
Normal file
52
llmx-rs/exec/tests/suite/originator.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::header;
|
||||
|
||||
/// Verify that when the server reports an error, `codex-exec` exits with a
|
||||
/// non-zero status code so automation can detect failures.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn send_codex_exec_originator() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![
|
||||
responses::ev_response_created("response_1"),
|
||||
responses::ev_assistant_message("response_1", "Hello, world!"),
|
||||
responses::ev_completed("response_1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, header("Originator", "codex_exec"), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.assert()
|
||||
.code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn supports_originator_override() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![
|
||||
responses::ev_response_created("response_1"),
|
||||
responses::ev_assistant_message("response_1", "Hello, world!"),
|
||||
responses::ev_completed("response_1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, header("Originator", "codex_exec_override"), body)
|
||||
.await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.env("CODEX_INTERNAL_ORIGINATOR_OVERRIDE", "codex_exec_override")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.assert()
|
||||
.code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
63
llmx-rs/exec/tests/suite/output_schema.rs
Normal file
63
llmx-rs/exec/tests/suite/output_schema.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use serde_json::Value;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let schema_contents = serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"answer": { "type": "string" }
|
||||
},
|
||||
"required": ["answer"],
|
||||
"additionalProperties": false
|
||||
});
|
||||
let schema_path = test.cwd_path().join("schema.json");
|
||||
std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
|
||||
let expected_schema: Value = schema_contents;
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![
|
||||
responses::ev_response_created("resp1"),
|
||||
responses::ev_assistant_message("m1", "fixture hello"),
|
||||
responses::ev_completed("resp1"),
|
||||
]);
|
||||
let response_mock = responses::mount_sse_once_match(&server, any(), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
// keep using -C in the test to exercise the flag as well
|
||||
.arg("-C")
|
||||
.arg(test.cwd_path())
|
||||
.arg("--output-schema")
|
||||
.arg(&schema_path)
|
||||
.arg("-m")
|
||||
.arg("gpt-5")
|
||||
.arg("tell me a joke")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let request = response_mock.single_request();
|
||||
let payload: Value = request.body_json();
|
||||
let text = payload.get("text").expect("request missing text field");
|
||||
let format = text
|
||||
.get("format")
|
||||
.expect("request missing text.format field");
|
||||
assert_eq!(
|
||||
format,
|
||||
&serde_json::json!({
|
||||
"name": "codex_output_schema",
|
||||
"type": "json_schema",
|
||||
"strict": true,
|
||||
"schema": expected_schema,
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
257
llmx-rs/exec/tests/suite/resume.rs
Normal file
257
llmx-rs/exec/tests/suite/resume.rs
Normal file
@@ -0,0 +1,257 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use anyhow::Context;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use serde_json::Value;
|
||||
use std::path::Path;
|
||||
use std::string::ToString;
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Utility: scan the sessions dir for a rollout file that contains `marker`
|
||||
/// in any response_item.message.content entry. Returns the absolute path.
|
||||
fn find_session_file_containing_marker(
|
||||
sessions_dir: &std::path::Path,
|
||||
marker: &str,
|
||||
) -> Option<std::path::PathBuf> {
|
||||
for entry in WalkDir::new(sessions_dir) {
|
||||
let entry = match entry {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
|
||||
continue;
|
||||
}
|
||||
let path = entry.path();
|
||||
let Ok(content) = std::fs::read_to_string(path) else {
|
||||
continue;
|
||||
};
|
||||
// Skip the first meta line and scan remaining JSONL entries.
|
||||
let mut lines = content.lines();
|
||||
if lines.next().is_none() {
|
||||
continue;
|
||||
}
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let Ok(item): Result<Value, _> = serde_json::from_str(line) else {
|
||||
continue;
|
||||
};
|
||||
if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
|
||||
&& let Some(payload) = item.get("payload")
|
||||
&& payload.get("type").and_then(|t| t.as_str()) == Some("message")
|
||||
&& payload
|
||||
.get("content")
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_default()
|
||||
.contains(marker)
|
||||
{
|
||||
return Some(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Extract the conversation UUID from the first SessionMeta line in the rollout file.
|
||||
fn extract_conversation_id(path: &std::path::Path) -> String {
|
||||
let content = std::fs::read_to_string(path).unwrap();
|
||||
let mut lines = content.lines();
|
||||
let meta_line = lines.next().expect("missing meta line");
|
||||
let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json");
|
||||
meta.get("payload")
|
||||
.and_then(|p| p.get("id"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let fixture =
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
|
||||
|
||||
// 1) First run: create a session with a unique marker in the content.
|
||||
let marker = format!("resume-last-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Find the created session file containing the marker.
|
||||
let sessions_dir = test.home_path().join("sessions");
|
||||
let path = find_session_file_containing_marker(&sessions_dir, &marker)
|
||||
.expect("no session file found after first run");
|
||||
|
||||
// 2) Second run: resume the most recent file with a new marker.
|
||||
let marker2 = format!("resume-last-2-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg("--last")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Ensure the same file was updated and contains both markers.
|
||||
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
|
||||
.expect("no resumed session file containing marker2");
|
||||
assert_eq!(
|
||||
resumed_path, path,
|
||||
"resume --last should append to existing file"
|
||||
);
|
||||
let content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(content.contains(&marker));
|
||||
assert!(content.contains(&marker2));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_resume_by_id_appends_to_existing_file() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let fixture =
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
|
||||
|
||||
// 1) First run: create a session
|
||||
let marker = format!("resume-by-id-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let sessions_dir = test.home_path().join("sessions");
|
||||
let path = find_session_file_containing_marker(&sessions_dir, &marker)
|
||||
.expect("no session file found after first run");
|
||||
let session_id = extract_conversation_id(&path);
|
||||
assert!(
|
||||
!session_id.is_empty(),
|
||||
"missing conversation id in meta line"
|
||||
);
|
||||
|
||||
// 2) Resume by id
|
||||
let marker2 = format!("resume-by-id-2-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg(&session_id)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
|
||||
.expect("no resumed session file containing marker2");
|
||||
assert_eq!(
|
||||
resumed_path, path,
|
||||
"resume by id should append to existing file"
|
||||
);
|
||||
let content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(content.contains(&marker));
|
||||
assert!(content.contains(&marker2));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
let fixture =
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/cli_responses_fixture.sse");
|
||||
|
||||
let marker = format!("resume-config-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
test.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let sessions_dir = test.home_path().join("sessions");
|
||||
let path = find_session_file_containing_marker(&sessions_dir, &marker)
|
||||
.expect("no session file found after first run");
|
||||
|
||||
let marker2 = format!("resume-config-2-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
|
||||
let output = test
|
||||
.cmd()
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5-high")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg("--last")
|
||||
.output()
|
||||
.context("resume run should succeed")?;
|
||||
|
||||
assert!(output.status.success(), "resume run failed: {output:?}");
|
||||
|
||||
let stderr = String::from_utf8(output.stderr)?;
|
||||
assert!(
|
||||
stderr.contains("model: gpt-5-high"),
|
||||
"stderr missing model override: {stderr}"
|
||||
);
|
||||
if cfg!(target_os = "windows") {
|
||||
assert!(
|
||||
stderr.contains("sandbox: read-only"),
|
||||
"stderr missing downgraded sandbox note: {stderr}"
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
stderr.contains("sandbox: workspace-write"),
|
||||
"stderr missing sandbox override: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
let resumed_path = find_session_file_containing_marker(&sessions_dir, &marker2)
|
||||
.expect("no resumed session file containing marker2");
|
||||
assert_eq!(resumed_path, path, "resume should append to same file");
|
||||
|
||||
let content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(content.contains(&marker));
|
||||
assert!(content.contains(&marker2));
|
||||
Ok(())
|
||||
}
|
||||
322
llmx-rs/exec/tests/suite/sandbox.rs
Normal file
322
llmx-rs/exec/tests/suite/sandbox.rs
Normal file
@@ -0,0 +1,322 @@
|
||||
#![cfg(unix)]
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::spawn::StdioPolicy;
|
||||
use std::collections::HashMap;
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use tokio::fs::create_dir_all;
|
||||
use tokio::process::Child;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
async fn spawn_command_under_sandbox(
|
||||
command: Vec<String>,
|
||||
command_cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_cwd: &Path,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
spawn_command_under_seatbelt(
|
||||
command,
|
||||
command_cwd,
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
async fn spawn_command_under_sandbox(
|
||||
command: Vec<String>,
|
||||
command_cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_cwd: &Path,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::landlock::spawn_command_under_linux_sandbox;
|
||||
let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
|
||||
spawn_command_under_linux_sandbox(
|
||||
codex_linux_sandbox_exe,
|
||||
command,
|
||||
command_cwd,
|
||||
sandbox_policy,
|
||||
sandbox_cwd,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn python_multiprocessing_lock_works_under_sandbox() {
|
||||
core_test_support::skip_if_sandbox!();
|
||||
#[cfg(target_os = "macos")]
|
||||
let writable_roots = Vec::<PathBuf>::new();
|
||||
|
||||
// From https://man7.org/linux/man-pages/man7/sem_overview.7.html
|
||||
//
|
||||
// > On Linux, named semaphores are created in a virtual filesystem,
|
||||
// > normally mounted under /dev/shm.
|
||||
#[cfg(target_os = "linux")]
|
||||
let writable_roots = vec![PathBuf::from("/dev/shm")];
|
||||
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
};
|
||||
|
||||
let python_code = r#"import multiprocessing
|
||||
from multiprocessing import Lock, Process
|
||||
|
||||
def f(lock):
|
||||
with lock:
|
||||
print("Lock acquired in child process")
|
||||
|
||||
if __name__ == '__main__':
|
||||
lock = Lock()
|
||||
p = Process(target=f, args=(lock,))
|
||||
p.start()
|
||||
p.join()
|
||||
"#;
|
||||
|
||||
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
python_code.to_string(),
|
||||
],
|
||||
command_cwd,
|
||||
&policy,
|
||||
sandbox_cwd.as_path(),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn python under sandbox");
|
||||
|
||||
let status = child.wait().await.expect("should wait for child process");
|
||||
assert!(status.success(), "python exited with {status:?}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_distinguishes_command_and_policy_cwds() {
|
||||
core_test_support::skip_if_sandbox!();
|
||||
let temp = tempfile::tempdir().expect("should be able to create temp dir");
|
||||
let sandbox_root = temp.path().join("sandbox");
|
||||
let command_root = temp.path().join("command");
|
||||
create_dir_all(&sandbox_root).await.expect("mkdir");
|
||||
create_dir_all(&command_root).await.expect("mkdir");
|
||||
let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
|
||||
.await
|
||||
.expect("canonicalize sandbox root");
|
||||
let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
|
||||
|
||||
let disallowed_path = command_root.join("forbidden.txt");
|
||||
|
||||
// Note writable_roots is empty: verify that `canonical_allowed_path` is
|
||||
// writable only because it is under the sandbox policy cwd, not because it
|
||||
// is under a writable root.
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![],
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
};
|
||||
|
||||
// Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo forbidden > forbidden.txt".to_string(),
|
||||
],
|
||||
command_root.clone(),
|
||||
&policy,
|
||||
canonical_sandbox_root.as_path(),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should spawn command writing to forbidden path");
|
||||
|
||||
let status = child
|
||||
.wait()
|
||||
.await
|
||||
.expect("should wait for forbidden command");
|
||||
assert!(
|
||||
!status.success(),
|
||||
"sandbox unexpectedly allowed writing to command cwd: {status:?}"
|
||||
);
|
||||
let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
|
||||
.await
|
||||
.expect("try_exists failed");
|
||||
assert!(
|
||||
!forbidden_exists,
|
||||
"forbidden path should not have been created"
|
||||
);
|
||||
|
||||
// Writing to the sandbox policy cwd after changing directories into it should succeed.
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"/usr/bin/touch".to_string(),
|
||||
canonical_allowed_path.to_string_lossy().into_owned(),
|
||||
],
|
||||
command_root,
|
||||
&policy,
|
||||
canonical_sandbox_root.as_path(),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should spawn command writing to sandbox root");
|
||||
|
||||
let status = child.wait().await.expect("should wait for allowed command");
|
||||
assert!(
|
||||
status.success(),
|
||||
"sandbox blocked allowed write: {status:?}"
|
||||
);
|
||||
let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
|
||||
.await
|
||||
.expect("try_exists allowed failed");
|
||||
assert!(allowed_exists, "allowed path should exist");
|
||||
}
|
||||
|
||||
fn unix_sock_body() {
|
||||
unsafe {
|
||||
let mut fds = [0i32; 2];
|
||||
let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
|
||||
assert_eq!(
|
||||
r,
|
||||
0,
|
||||
"socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
let msg = b"hello_unix";
|
||||
// write() from one end (generic write is allowed)
|
||||
let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
|
||||
assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
|
||||
|
||||
// recvfrom() on the other end. We don’t need the address for socketpair,
|
||||
// so we pass null pointers for src address.
|
||||
let mut buf = [0u8; 64];
|
||||
let recvd = libc::recvfrom(
|
||||
fds[1],
|
||||
buf.as_mut_ptr() as *mut libc::c_void,
|
||||
buf.len(),
|
||||
0,
|
||||
std::ptr::null_mut(),
|
||||
std::ptr::null_mut(),
|
||||
);
|
||||
assert!(
|
||||
recvd >= 0,
|
||||
"recvfrom() failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
let recvd_slice = &buf[..(recvd as usize)];
|
||||
assert_eq!(
|
||||
recvd_slice,
|
||||
&msg[..],
|
||||
"payload mismatch: sent {} bytes, got {} bytes",
|
||||
msg.len(),
|
||||
recvd
|
||||
);
|
||||
|
||||
// Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
|
||||
let mut sfds = [0i32; 2];
|
||||
let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
|
||||
assert_eq!(
|
||||
sr,
|
||||
0,
|
||||
"socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
|
||||
assert!(
|
||||
snt2 >= 0,
|
||||
"write(stream) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
let mut b2 = [0u8; 64];
|
||||
let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
|
||||
assert!(
|
||||
rcv2 >= 0,
|
||||
"recv(stream) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
// Clean up
|
||||
let _ = libc::close(sfds[0]);
|
||||
let _ = libc::close(sfds[1]);
|
||||
let _ = libc::close(fds[0]);
|
||||
let _ = libc::close(fds[1]);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn allow_unix_socketpair_recvfrom() {
|
||||
run_code_under_sandbox(
|
||||
"allow_unix_socketpair_recvfrom",
|
||||
&SandboxPolicy::ReadOnly,
|
||||
|| async { unix_sock_body() },
|
||||
)
|
||||
.await
|
||||
.expect("should be able to reexec");
|
||||
}
|
||||
|
||||
const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
pub async fn run_code_under_sandbox<F, Fut>(
|
||||
test_selector: &str,
|
||||
policy: &SandboxPolicy,
|
||||
child_body: F,
|
||||
) -> io::Result<Option<ExitStatus>>
|
||||
where
|
||||
F: FnOnce() -> Fut + Send + 'static,
|
||||
Fut: Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
|
||||
let exe = std::env::current_exe()?;
|
||||
let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
|
||||
let mut stdio_policy = StdioPolicy::RedirectForShellTool;
|
||||
// Allow for us to pass forward --nocapture / use the right stdio policy.
|
||||
if std::env::args().any(|a| a == "--nocapture") {
|
||||
cmds.push("--nocapture".into());
|
||||
stdio_policy = StdioPolicy::Inherit;
|
||||
}
|
||||
cmds.push(test_selector.into());
|
||||
|
||||
// Your existing launcher:
|
||||
let command_cwd = std::env::current_dir().expect("should be able to get current dir");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
cmds,
|
||||
command_cwd,
|
||||
policy,
|
||||
sandbox_cwd.as_path(),
|
||||
stdio_policy,
|
||||
HashMap::from([("IN_SANDBOX".into(), "1".into())]),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let status = child.wait().await?;
|
||||
Ok(Some(status))
|
||||
} else {
|
||||
// Child branch: run the provided body.
|
||||
child_body().await;
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
34
llmx-rs/exec/tests/suite/server_error_exit.rs
Normal file
34
llmx-rs/exec/tests/suite/server_error_exit.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
/// Verify that when the server reports an error, `codex-exec` exits with a
|
||||
/// non-zero status code so automation can detect failures.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
// Mock a simple Responses API SSE stream that immediately reports a
|
||||
// `response.failed` event with an error message.
|
||||
let server = responses::start_mock_server().await;
|
||||
let body = responses::sse(vec![serde_json::json!({
|
||||
"type": "response.failed",
|
||||
"response": {
|
||||
"id": "resp_err_1",
|
||||
"error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
|
||||
}
|
||||
})]);
|
||||
responses::mount_sse_once_match(&server, any(), body).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("tell me something")
|
||||
.arg("--experimental-json")
|
||||
.assert()
|
||||
.code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user