chore: refactor tool handling (#4510)

# Tool System Refactor

- Centralizes tool definitions and execution in `core/src/tools/*`:
specs (`spec.rs`), handlers (`handlers/*`), router (`router.rs`),
registry/dispatch (`registry.rs`), and shared context (`context.rs`).
One registry now builds the model-visible tool list and binds handlers.
- Router converts model responses to tool calls; Registry dispatches
with consistent telemetry via `codex-rs/otel` and unified error
handling. Function, Local Shell, MCP, and experimental `unified_exec`
all flow through this path; legacy shell aliases still work.
- Rationale: reduce per‑tool boilerplate, keep spec/handler in sync, and
make adding tools predictable and testable.

Example: `read_file`
- Spec: `core/src/tools/spec.rs` (see `create_read_file_tool`,
registered by `build_specs`).
- Handler: `core/src/tools/handlers/read_file.rs` (absolute `file_path`,
1‑indexed `offset`, `limit`, `L#: ` prefixes, safe truncation).
- E2E test: `core/tests/suite/read_file.rs` validates the tool returns
the requested lines.

## Next steps:
- Decompose `handle_container_exec_with_params` 
- Add parallel tool calls
This commit is contained in:
jif-oai
2025-10-03 13:21:06 +01:00
committed by GitHub
parent 69cb72f842
commit 33d3ecbccc
48 changed files with 5288 additions and 2006 deletions

View File

@@ -7,6 +7,9 @@ use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::config::ConfigToml;
#[cfg(target_os = "linux")]
use assert_cmd::cargo::cargo_bin;
pub mod responses;
pub mod test_codex;
pub mod test_codex_exec;
@@ -17,12 +20,25 @@ pub mod test_codex_exec;
pub fn load_default_config_for_test(codex_home: &TempDir) -> Config {
Config::load_from_base_config_with_overrides(
ConfigToml::default(),
ConfigOverrides::default(),
default_test_overrides(),
codex_home.path().to_path_buf(),
)
.expect("defaults for test should always succeed")
}
#[cfg(target_os = "linux")]
fn default_test_overrides() -> ConfigOverrides {
ConfigOverrides {
codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")),
..ConfigOverrides::default()
}
}
#[cfg(not(target_os = "linux"))]
fn default_test_overrides() -> ConfigOverrides {
ConfigOverrides::default()
}
/// Builds an SSE stream body from a JSON fixture.
///
/// The fixture must contain an array of objects where each object represents a

View File

@@ -12,12 +12,18 @@ mod fork_conversation;
mod json_result;
mod live_cli;
mod model_overrides;
mod model_tools;
mod otel;
mod prompt_caching;
mod read_file;
mod review;
mod rmcp_client;
mod rollout_list_find;
mod seatbelt;
mod stream_error_allows_next_turn;
mod stream_no_completed;
mod tool_harness;
mod tools;
mod unified_exec;
mod user_notification;
mod view_image;

View File

@@ -0,0 +1,124 @@
#![allow(clippy::unwrap_used)]
use codex_core::CodexAuth;
use codex_core::ConversationManager;
use codex_core::ModelProviderInfo;
use codex_core::built_in_model_providers;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use core_test_support::load_default_config_for_test;
use core_test_support::load_sse_fixture_with_id;
use core_test_support::skip_if_no_network;
use core_test_support::wait_for_event;
use tempfile::TempDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
fn sse_completed(id: &str) -> String {
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
}
#[allow(clippy::expect_used)]
fn tool_identifiers(body: &serde_json::Value) -> Vec<String> {
body["tools"]
.as_array()
.unwrap()
.iter()
.map(|tool| {
tool.get("name")
.and_then(|v| v.as_str())
.or_else(|| tool.get("type").and_then(|v| v.as_str()))
.map(std::string::ToString::to_string)
.expect("tool should have either name or type")
})
.collect()
}
#[allow(clippy::expect_used)]
async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
let server = MockServer::start().await;
let sse = sse_completed(model);
let template = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse, "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(template)
.expect(1)
.mount(&server)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let cwd = TempDir::new().unwrap();
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.cwd = cwd.path().to_path_buf();
config.model_provider = model_provider;
config.model = model.to_string();
config.model_family =
find_family_for_model(model).unwrap_or_else(|| panic!("unknown model family for {model}"));
config.include_plan_tool = false;
config.include_apply_patch_tool = false;
config.include_view_image_tool = false;
config.tools_web_search_request = false;
config.use_experimental_streamable_shell_tool = false;
config.use_experimental_unified_exec_tool = false;
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let codex = conversation_manager
.new_conversation(config)
.await
.expect("create new conversation")
.conversation;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello tools".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.unwrap();
assert_eq!(
requests.len(),
1,
"expected a single request for model {model}"
);
let body = requests[0].body_json::<serde_json::Value>().unwrap();
tool_identifiers(&body)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn model_selects_expected_tools() {
skip_if_no_network!();
use pretty_assertions::assert_eq;
let codex_tools = collect_tool_identifiers_for_model("codex-mini-latest").await;
assert_eq!(
codex_tools,
vec!["local_shell".to_string(), "read_file".to_string()],
"codex-mini-latest should expose the local shell tool",
);
let o3_tools = collect_tool_identifiers_for_model("o3").await;
assert_eq!(
o3_tools,
vec!["shell".to_string(), "read_file".to_string()],
"o3 should expose the generic shell tool",
);
}

View File

@@ -219,7 +219,13 @@ async fn prompt_tools_are_consistent_across_requests() {
// our internal implementation is responsible for keeping tools in sync
// with the OpenAI schema, so we just verify the tool presence here
let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch", "view_image"];
let expected_tools_names: &[&str] = &[
"shell",
"update_plan",
"apply_patch",
"read_file",
"view_image",
];
let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
assert_eq!(
body0["instructions"],

View File

@@ -0,0 +1,124 @@
#![cfg(not(target_os = "windows"))]
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use core_test_support::responses;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use serde_json::Value;
use wiremock::matchers::any;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
cwd,
session_configured,
..
} = test_codex().build(&server).await?;
let file_path = cwd.path().join("sample.txt");
std::fs::write(&file_path, "first\nsecond\nthird\nfourth\n")?;
let file_path = file_path.to_string_lossy().to_string();
let call_id = "read-file-call";
let arguments = serde_json::json!({
"file_path": file_path,
"offset": 2,
"limit": 2,
})
.to_string();
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_function_call(call_id, "read_file", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please inspect sample.txt".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().unwrap())
.collect::<Vec<_>>();
assert!(
!request_bodies.is_empty(),
"expected at least one request body"
);
let tool_output_item = request_bodies
.iter()
.find_map(|body| {
body.get("input")
.and_then(Value::as_array)
.and_then(|items| {
items.iter().find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
})
})
})
.unwrap_or_else(|| {
panic!("function_call_output item not found in requests: {request_bodies:#?}")
});
assert_eq!(
tool_output_item.get("call_id").and_then(Value::as_str),
Some(call_id)
);
let output_text = tool_output_item
.get("output")
.and_then(|value| match value {
Value::String(text) => Some(text.as_str()),
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
_ => None,
})
.expect("output text present");
assert_eq!(output_text, "L2: second\nL3: third");
Ok(())
}

View File

@@ -0,0 +1,568 @@
#![cfg(not(target_os = "windows"))]
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::plan_tool::StepStatus;
use core_test_support::responses;
use core_test_support::responses::ev_apply_patch_function_call;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_local_shell_call;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use serde_json::Value;
use serde_json::json;
use wiremock::matchers::any;
fn function_call_output(body: &Value) -> Option<&Value> {
body.get("input")
.and_then(Value::as_array)
.and_then(|items| {
items.iter().find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
})
})
}
fn extract_output_text(item: &Value) -> Option<&str> {
item.get("output").and_then(|value| match value {
Value::String(text) => Some(text.as_str()),
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
_ => None,
})
}
fn find_request_with_function_call_output(requests: &[Value]) -> Option<&Value> {
requests
.iter()
.find(|body| function_call_output(body).is_some())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "shell-tool-call";
let command = vec!["/bin/echo", "tool harness"];
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_local_shell_call(call_id, "completed", command),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "all done"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please run the shell command".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = codex.next_event().await.expect("event");
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
let output_text = extract_output_text(output_item).expect("output text present");
let exec_output: Value = serde_json::from_str(output_text)?;
assert_eq!(exec_output["metadata"]["exit_code"], 0);
let stdout = exec_output["output"].as_str().expect("stdout field");
assert!(
stdout.contains("tool harness"),
"expected stdout to contain command output, got {stdout:?}"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_plan_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "plan-tool-call";
let plan_args = json!({
"explanation": "Tool harness check",
"plan": [
{"step": "Inspect workspace", "status": "in_progress"},
{"step": "Report results", "status": "pending"},
],
})
.to_string();
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_function_call(call_id, "update_plan", &plan_args),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "plan acknowledged"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please update the plan".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let mut saw_plan_update = false;
loop {
let event = codex.next_event().await.expect("event");
match event.msg {
EventMsg::PlanUpdate(update) => {
saw_plan_update = true;
assert_eq!(update.explanation.as_deref(), Some("Tool harness check"));
assert_eq!(update.plan.len(), 2);
assert_eq!(update.plan[0].step, "Inspect workspace");
assert!(matches!(update.plan[0].status, StepStatus::InProgress));
assert_eq!(update.plan[1].step, "Report results");
assert!(matches!(update.plan[1].status, StepStatus::Pending));
}
EventMsg::TaskComplete(_) => break,
_ => {}
}
}
assert!(saw_plan_update, "expected PlanUpdate event");
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
assert_eq!(
output_item.get("call_id").and_then(Value::as_str),
Some(call_id)
);
let output_text = extract_output_text(output_item).expect("output text present");
assert_eq!(output_text, "Plan updated");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_plan_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "plan-tool-invalid";
let invalid_args = json!({
"explanation": "Missing plan data"
})
.to_string();
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_function_call(call_id, "update_plan", &invalid_args),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "malformed plan payload"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please update the plan".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let mut saw_plan_update = false;
loop {
let event = codex.next_event().await.expect("event");
match event.msg {
EventMsg::PlanUpdate(_) => saw_plan_update = true,
EventMsg::TaskComplete(_) => break,
_ => {}
}
}
assert!(
!saw_plan_update,
"did not expect PlanUpdate event for malformed payload"
);
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
assert_eq!(
output_item.get("call_id").and_then(Value::as_str),
Some(call_id)
);
let output_text = extract_output_text(output_item).expect("output text present");
assert!(
output_text.contains("failed to parse function arguments"),
"expected parse error message in output text, got {output_text:?}"
);
if let Some(success_flag) = output_item
.get("output")
.and_then(|value| value.as_object())
.and_then(|obj| obj.get("success"))
.and_then(serde_json::Value::as_bool)
{
assert!(
!success_flag,
"expected tool output to mark success=false for malformed payload"
);
}
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "apply-patch-call";
let patch_content = r#"*** Begin Patch
*** Add File: notes.txt
+Tool harness apply patch
*** End Patch"#;
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_apply_patch_function_call(call_id, patch_content),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "patch complete"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please apply a patch".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let mut saw_patch_begin = false;
let mut patch_end_success = None;
loop {
let event = codex.next_event().await.expect("event");
match event.msg {
EventMsg::PatchApplyBegin(begin) => {
saw_patch_begin = true;
assert_eq!(begin.call_id, call_id);
}
EventMsg::PatchApplyEnd(end) => {
assert_eq!(end.call_id, call_id);
patch_end_success = Some(end.success);
}
EventMsg::TaskComplete(_) => break,
_ => {}
}
}
assert!(saw_patch_begin, "expected PatchApplyBegin event");
let patch_end_success =
patch_end_success.expect("expected PatchApplyEnd event to capture success flag");
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
assert_eq!(
output_item.get("call_id").and_then(Value::as_str),
Some(call_id)
);
let output_text = extract_output_text(output_item).expect("output text present");
if let Ok(exec_output) = serde_json::from_str::<Value>(output_text) {
let exit_code = exec_output["metadata"]["exit_code"]
.as_i64()
.expect("exit_code present");
let summary = exec_output["output"].as_str().expect("output field");
assert_eq!(
exit_code, 0,
"expected apply_patch exit_code=0, got {exit_code}, summary: {summary:?}"
);
assert!(
patch_end_success,
"expected PatchApplyEnd success flag, summary: {summary:?}"
);
assert!(
summary.contains("Success."),
"expected apply_patch summary to note success, got {summary:?}"
);
let patched_path = cwd.path().join("notes.txt");
let contents = std::fs::read_to_string(&patched_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", patched_path.display()));
assert_eq!(contents, "Tool harness apply patch\n");
} else {
assert!(
output_text.contains("codex-run-as-apply-patch"),
"expected apply_patch failure message to mention codex-run-as-apply-patch, got {output_text:?}"
);
assert!(
!patch_end_success,
"expected PatchApplyEnd to report success=false when apply_patch invocation fails"
);
}
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "apply-patch-parse-error";
let patch_content = r"*** Begin Patch
*** Update File: broken.txt
*** End Patch";
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_apply_patch_function_call(call_id, patch_content),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "failed"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please apply a patch".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = codex.next_event().await.expect("event");
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
assert_eq!(
output_item.get("call_id").and_then(Value::as_str),
Some(call_id)
);
let output_text = extract_output_text(output_item).expect("output text present");
assert!(
output_text.contains("apply_patch verification failed"),
"expected apply_patch verification failure message, got {output_text:?}"
);
assert!(
output_text.contains("invalid hunk"),
"expected parse diagnostics in output text, got {output_text:?}"
);
if let Some(success_flag) = output_item
.get("output")
.and_then(|value| value.as_object())
.and_then(|obj| obj.get("success"))
.and_then(serde_json::Value::as_bool)
{
assert!(
!success_flag,
"expected tool output to mark success=false for parse failures"
);
}
Ok(())
}

View File

@@ -0,0 +1,450 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_custom_tool_call;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use serde_json::Value;
use serde_json::json;
use wiremock::Request;
async fn submit_turn(
test: &TestCodex,
prompt: &str,
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
) -> Result<()> {
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: prompt.into(),
}],
final_output_json_schema: None,
cwd: test.cwd.path().to_path_buf(),
approval_policy,
sandbox_policy,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = test.codex.next_event().await?;
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
Ok(())
}
fn request_bodies(requests: &[Request]) -> Result<Vec<Value>> {
requests
.iter()
.map(|req| Ok(serde_json::from_slice::<Value>(&req.body)?))
.collect()
}
fn collect_output_items<'a>(bodies: &'a [Value], ty: &str) -> Vec<&'a Value> {
let mut out = Vec::new();
for body in bodies {
if let Some(items) = body.get("input").and_then(Value::as_array) {
for item in items {
if item.get("type").and_then(Value::as_str) == Some(ty) {
out.push(item);
}
}
}
}
out
}
fn tool_names(body: &Value) -> Vec<String> {
body.get("tools")
.and_then(Value::as_array)
.map(|tools| {
tools
.iter()
.filter_map(|tool| {
tool.get("name")
.or_else(|| tool.get("type"))
.and_then(Value::as_str)
.map(str::to_string)
})
.collect()
})
.unwrap_or_default()
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn custom_tool_unknown_returns_custom_output_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex();
let test = builder.build(&server).await?;
let call_id = "custom-unsupported";
let tool_name = "unsupported_tool";
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_custom_tool_call(call_id, tool_name, "\"payload\""),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
submit_turn(
&test,
"invoke custom tool",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;
let requests = server.received_requests().await.expect("recorded requests");
let bodies = request_bodies(&requests)?;
let custom_items = collect_output_items(&bodies, "custom_tool_call_output");
assert_eq!(custom_items.len(), 1, "expected single custom tool output");
let item = custom_items[0];
assert_eq!(item.get("call_id").and_then(Value::as_str), Some(call_id));
let output = item
.get("output")
.and_then(Value::as_str)
.unwrap_or_default();
let expected = format!("unsupported custom tool call: {tool_name}");
assert_eq!(output, expected);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex();
let test = builder.build(&server).await?;
let command = ["/bin/echo", "shell ok"];
let call_id_blocked = "shell-blocked";
let call_id_success = "shell-success";
let first_args = json!({
"command": command,
"timeout_ms": 1_000,
"with_escalated_permissions": true,
});
let second_args = json!({
"command": command,
"timeout_ms": 1_000,
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(
call_id_blocked,
"shell",
&serde_json::to_string(&first_args)?,
),
ev_completed("resp-1"),
]),
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-2"}}),
ev_function_call(
call_id_success,
"shell",
&serde_json::to_string(&second_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-3"),
]),
];
mount_sse_sequence(&server, responses).await;
submit_turn(
&test,
"run the shell command",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;
let requests = server.received_requests().await.expect("recorded requests");
let bodies = request_bodies(&requests)?;
let function_outputs = collect_output_items(&bodies, "function_call_output");
for item in &function_outputs {
let call_id = item
.get("call_id")
.and_then(Value::as_str)
.unwrap_or_default();
assert!(
call_id == call_id_blocked || call_id == call_id_success,
"unexpected call id {call_id}"
);
}
let policy = AskForApproval::Never;
let expected_message = format!(
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}"
);
let blocked_outputs: Vec<&Value> = function_outputs
.iter()
.filter(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id_blocked))
.copied()
.collect();
assert!(
!blocked_outputs.is_empty(),
"expected at least one rejection output for {call_id_blocked}"
);
for item in blocked_outputs {
assert_eq!(
item.get("output").and_then(Value::as_str),
Some(expected_message.as_str()),
"unexpected rejection message"
);
}
let success_item = function_outputs
.iter()
.find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id_success))
.expect("success output present");
let output_json: Value = serde_json::from_str(
success_item
.get("output")
.and_then(Value::as_str)
.expect("success output string"),
)?;
assert_eq!(
output_json["metadata"]["exit_code"].as_i64(),
Some(0),
"expected exit code 0 after rerunning without escalation",
);
let stdout = output_json["output"].as_str().unwrap_or_default();
assert!(
stdout.contains("shell ok"),
"expected stdout to include command output, got {stdout:?}"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn local_shell_missing_ids_maps_to_function_output_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex();
let test = builder.build(&server).await?;
let local_shell_event = json!({
"type": "response.output_item.done",
"item": {
"type": "local_shell_call",
"status": "completed",
"action": {
"type": "exec",
"command": ["/bin/echo", "hi"],
}
}
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
local_shell_event,
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
submit_turn(
&test,
"check shell output",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;
let requests = server.received_requests().await.expect("recorded requests");
let bodies = request_bodies(&requests)?;
let function_outputs = collect_output_items(&bodies, "function_call_output");
assert_eq!(
function_outputs.len(),
1,
"expected a single function output"
);
let item = function_outputs[0];
assert_eq!(item.get("call_id").and_then(Value::as_str), Some(""));
assert_eq!(
item.get("output").and_then(Value::as_str),
Some("LocalShellCall without call_id or id"),
);
Ok(())
}
async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
let server = start_mock_server().await;
let responses = vec![sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
])];
mount_sse_sequence(&server, responses).await;
let mut builder = test_codex().with_config(move |config| {
config.use_experimental_unified_exec_tool = use_unified_exec;
});
let test = builder.build(&server).await?;
submit_turn(
&test,
"list tools",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;
let requests = server.received_requests().await.expect("recorded requests");
assert_eq!(
requests.len(),
1,
"expected a single request for tools collection"
);
let bodies = request_bodies(&requests)?;
let first_body = bodies.first().expect("request body present");
Ok(tool_names(first_body))
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_spec_toggle_end_to_end() -> Result<()> {
skip_if_no_network!(Ok(()));
let tools_disabled = collect_tools(false).await?;
assert!(
!tools_disabled.iter().any(|name| name == "unified_exec"),
"tools list should not include unified_exec when disabled: {tools_disabled:?}"
);
let tools_enabled = collect_tools(true).await?;
assert!(
tools_enabled.iter().any(|name| name == "unified_exec"),
"tools list should include unified_exec when enabled: {tools_enabled:?}"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex();
let test = builder.build(&server).await?;
let call_id = "shell-timeout";
let timeout_ms = 50u64;
let args = json!({
"command": ["/bin/sh", "-c", "yes line | head -n 400; sleep 1"],
"timeout_ms": timeout_ms,
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
submit_turn(
&test,
"run a long command",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;
let requests = server.received_requests().await.expect("recorded requests");
let bodies = request_bodies(&requests)?;
let function_outputs = collect_output_items(&bodies, "function_call_output");
let timeout_item = function_outputs
.iter()
.find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id))
.expect("timeout output present");
let output_json: Value = serde_json::from_str(
timeout_item
.get("output")
.and_then(Value::as_str)
.expect("timeout output string"),
)?;
assert_eq!(
output_json["metadata"]["exit_code"].as_i64(),
Some(124),
"expected timeout exit code 124",
);
let stdout = output_json["output"].as_str().unwrap_or_default();
assert!(
stdout.starts_with("command timed out after "),
"expected timeout prefix, got {stdout:?}"
);
let first_line = stdout.lines().next().unwrap_or_default();
let duration_ms = first_line
.strip_prefix("command timed out after ")
.and_then(|line| line.strip_suffix(" milliseconds"))
.and_then(|value| value.parse::<u64>().ok())
.unwrap_or_default();
assert!(
duration_ms >= timeout_ms,
"expected duration >= configured timeout, got {duration_ms} (timeout {timeout_ms})"
);
assert!(
stdout.contains("[... omitted"),
"expected truncated output marker, got {stdout:?}"
);
Ok(())
}

View File

@@ -0,0 +1,280 @@
#![cfg(not(target_os = "windows"))]
use std::collections::HashMap;
use anyhow::Result;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use serde_json::Value;
fn extract_output_text(item: &Value) -> Option<&str> {
item.get("output").and_then(|value| match value {
Value::String(text) => Some(text.as_str()),
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
_ => None,
})
}
fn collect_tool_outputs(bodies: &[Value]) -> Result<HashMap<String, Value>> {
let mut outputs = HashMap::new();
for body in bodies {
if let Some(items) = body.get("input").and_then(Value::as_array) {
for item in items {
if item.get("type").and_then(Value::as_str) != Some("function_call_output") {
continue;
}
if let Some(call_id) = item.get("call_id").and_then(Value::as_str) {
let content = extract_output_text(item)
.ok_or_else(|| anyhow::anyhow!("missing tool output content"))?;
let parsed: Value = serde_json::from_str(content)?;
outputs.insert(call_id.to_string(), parsed);
}
}
}
}
Ok(outputs)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.use_experimental_unified_exec_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let first_call_id = "uexec-start";
let first_args = serde_json::json!({
"input": ["/bin/cat"],
"timeout_ms": 200,
});
let second_call_id = "uexec-stdin";
let second_args = serde_json::json!({
"input": ["hello unified exec\n"],
"session_id": "0",
"timeout_ms": 500,
});
let responses = vec![
sse(vec![
serde_json::json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(
first_call_id,
"unified_exec",
&serde_json::to_string(&first_args)?,
),
ev_completed("resp-1"),
]),
sse(vec![
serde_json::json!({"type": "response.created", "response": {"id": "resp-2"}}),
ev_function_call(
second_call_id,
"unified_exec",
&serde_json::to_string(&second_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_assistant_message("msg-1", "all done"),
ev_completed("resp-3"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "run unified exec".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = codex.next_event().await.expect("event");
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let start_output = outputs
.get(first_call_id)
.expect("missing first unified_exec output");
let session_id = start_output["session_id"].as_str().unwrap_or_default();
assert!(
!session_id.is_empty(),
"expected session id in first unified_exec response"
);
assert!(
start_output["output"]
.as_str()
.unwrap_or_default()
.is_empty()
);
let reuse_output = outputs
.get(second_call_id)
.expect("missing reused unified_exec output");
assert_eq!(
reuse_output["session_id"].as_str().unwrap_or_default(),
session_id
);
let echoed = reuse_output["output"].as_str().unwrap_or_default();
assert!(
echoed.contains("hello unified exec"),
"expected echoed output, got {echoed:?}"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.use_experimental_unified_exec_tool = true;
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let first_call_id = "uexec-timeout";
let first_args = serde_json::json!({
"input": ["/bin/sh", "-c", "sleep 0.1; echo ready"],
"timeout_ms": 10,
});
let second_call_id = "uexec-poll";
let second_args = serde_json::json!({
"input": Vec::<String>::new(),
"session_id": "0",
"timeout_ms": 800,
});
let responses = vec![
sse(vec![
serde_json::json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(
first_call_id,
"unified_exec",
&serde_json::to_string(&first_args)?,
),
ev_completed("resp-1"),
]),
sse(vec![
serde_json::json!({"type": "response.created", "response": {"id": "resp-2"}}),
ev_function_call(
second_call_id,
"unified_exec",
&serde_json::to_string(&second_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-3"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "check timeout".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = codex.next_event().await.expect("event");
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let first_output = outputs.get(first_call_id).expect("missing timeout output");
assert_eq!(first_output["session_id"], "0");
assert!(
first_output["output"]
.as_str()
.unwrap_or_default()
.is_empty()
);
let poll_output = outputs.get(second_call_id).expect("missing poll output");
let output_text = poll_output["output"].as_str().unwrap_or_default();
assert!(
output_text.contains("ready"),
"expected ready output, got {output_text:?}"
);
Ok(())
}

View File

@@ -0,0 +1,351 @@
#![cfg(not(target_os = "windows"))]
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use core_test_support::responses;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use serde_json::Value;
use wiremock::matchers::any;
fn function_call_output(body: &Value) -> Option<&Value> {
body.get("input")
.and_then(Value::as_array)
.and_then(|items| {
items.iter().find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
})
})
}
fn find_image_message(body: &Value) -> Option<&Value> {
body.get("input")
.and_then(Value::as_array)
.and_then(|items| {
items.iter().find(|item| {
item.get("type").and_then(Value::as_str) == Some("message")
&& item
.get("content")
.and_then(Value::as_array)
.map(|content| {
content.iter().any(|span| {
span.get("type").and_then(Value::as_str) == Some("input_image")
})
})
.unwrap_or(false)
})
})
}
fn extract_output_text(item: &Value) -> Option<&str> {
item.get("output").and_then(|value| match value {
Value::String(text) => Some(text.as_str()),
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
_ => None,
})
}
fn find_request_with_function_call_output(requests: &[Value]) -> Option<&Value> {
requests
.iter()
.find(|body| function_call_output(body).is_some())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
cwd,
session_configured,
..
} = test_codex().build(&server).await?;
let rel_path = "assets/example.png";
let abs_path = cwd.path().join(rel_path);
if let Some(parent) = abs_path.parent() {
std::fs::create_dir_all(parent)?;
}
let image_bytes = b"fake_png_bytes".to_vec();
std::fs::write(&abs_path, &image_bytes)?;
let call_id = "view-image-call";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please add the screenshot".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let mut tool_event = None;
loop {
let event = codex.next_event().await.expect("event");
match event.msg {
EventMsg::ViewImageToolCall(ev) => tool_event = Some(ev),
EventMsg::TaskComplete(_) => break,
_ => {}
}
}
let tool_event = tool_event.expect("view image tool event emitted");
assert_eq!(tool_event.call_id, call_id);
assert_eq!(tool_event.path, abs_path);
let requests = server.received_requests().await.expect("recorded requests");
assert!(
requests.len() >= 2,
"expected at least two POST requests, got {}",
requests.len()
);
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
let output_text = extract_output_text(output_item).expect("output text present");
assert_eq!(output_text, "attached local image path");
let image_message = find_image_message(body_with_tool_output)
.expect("pending input image message not included in request");
let image_url = image_message
.get("content")
.and_then(Value::as_array)
.and_then(|content| {
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url").and_then(Value::as_str)
} else {
None
}
})
})
.expect("image_url present");
let expected_image_url = format!(
"data:image/png;base64,{}",
BASE64_STANDARD.encode(&image_bytes)
);
assert_eq!(image_url, expected_image_url);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
cwd,
session_configured,
..
} = test_codex().build(&server).await?;
let rel_path = "assets";
let abs_path = cwd.path().join(rel_path);
std::fs::create_dir_all(&abs_path)?;
let call_id = "view-image-directory";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please attach the folder".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = codex.next_event().await.expect("event");
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
let requests = server.received_requests().await.expect("recorded requests");
assert!(
requests.len() >= 2,
"expected at least two POST requests, got {}",
requests.len()
);
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
let output_text = extract_output_text(output_item).expect("output text present");
let expected_message = format!("image path `{}` is not a file", abs_path.display());
assert_eq!(output_text, expected_message);
assert!(
find_image_message(body_with_tool_output).is_none(),
"directory path should not produce an input_image message"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
cwd,
session_configured,
..
} = test_codex().build(&server).await?;
let rel_path = "missing/example.png";
let abs_path = cwd.path().join(rel_path);
let call_id = "view-image-missing";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
let first_response = sse(vec![
serde_json::json!({
"type": "response.created",
"response": {"id": "resp-1"}
}),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![InputItem::Text {
text: "please attach the missing image".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
loop {
let event = codex.next_event().await.expect("event");
if matches!(event.msg, EventMsg::TaskComplete(_)) {
break;
}
}
let requests = server.received_requests().await.expect("recorded requests");
assert!(
requests.len() >= 2,
"expected at least two POST requests, got {}",
requests.len()
);
let request_bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
.expect("function_call_output item not found in requests");
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
let output_text = extract_output_text(output_item).expect("output text present");
let expected_prefix = format!("unable to locate image at `{}`:", abs_path.display());
assert!(
output_text.starts_with(&expected_prefix),
"expected error to start with `{expected_prefix}` but got `{output_text}`"
);
assert!(
find_image_message(body_with_tool_output).is_none(),
"missing file should not produce an input_image message"
);
Ok(())
}