diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 624a4ccb..dcd244db 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -9,9 +9,11 @@ use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::config_types::Verbosity as VerbosityConfig; use codex_protocol::models::ResponseItem; use futures::Stream; +use serde::Deserialize; use serde::Serialize; use serde_json::Value; use std::borrow::Cow; +use std::collections::HashSet; use std::ops::Deref; use std::pin::Pin; use std::task::Context; @@ -64,10 +66,114 @@ impl Prompt { } pub(crate) fn get_formatted_input(&self) -> Vec { - self.input.clone() + let mut input = self.input.clone(); + + // when using the *Freeform* apply_patch tool specifically, tool outputs + // should be structured text, not json. Do NOT reserialize when using + // the Function tool - note that this differs from the check above for + // instructions. We declare the result as a named variable for clarity. + let is_freeform_apply_patch_tool_present = self.tools.iter().any(|tool| match tool { + ToolSpec::Freeform(f) => f.name == "apply_patch", + _ => false, + }); + if is_freeform_apply_patch_tool_present { + reserialize_shell_outputs(&mut input); + } + + input } } +fn reserialize_shell_outputs(items: &mut [ResponseItem]) { + let mut shell_call_ids: HashSet = HashSet::new(); + + items.iter_mut().for_each(|item| match item { + ResponseItem::LocalShellCall { call_id, id, .. } => { + if let Some(identifier) = call_id.clone().or_else(|| id.clone()) { + shell_call_ids.insert(identifier); + } + } + ResponseItem::CustomToolCall { + id: _, + status: _, + call_id, + name, + input: _, + } => { + if name == "apply_patch" { + shell_call_ids.insert(call_id.clone()); + } + } + ResponseItem::CustomToolCallOutput { call_id, output } => { + if shell_call_ids.remove(call_id) + && let Some(structured) = parse_structured_shell_output(output) + { + *output = structured + } + } + ResponseItem::FunctionCall { name, call_id, .. } + if is_shell_tool_name(name) || name == "apply_patch" => + { + shell_call_ids.insert(call_id.clone()); + } + ResponseItem::FunctionCallOutput { call_id, output } => { + if shell_call_ids.remove(call_id) + && let Some(structured) = parse_structured_shell_output(&output.content) + { + output.content = structured + } + } + _ => {} + }) +} + +fn is_shell_tool_name(name: &str) -> bool { + matches!(name, "shell" | "container.exec") +} + +#[derive(Deserialize)] +struct ExecOutputJson { + output: String, + metadata: ExecOutputMetadataJson, +} + +#[derive(Deserialize)] +struct ExecOutputMetadataJson { + exit_code: i32, + duration_seconds: f32, +} + +fn parse_structured_shell_output(raw: &str) -> Option { + let parsed: ExecOutputJson = serde_json::from_str(raw).ok()?; + Some(build_structured_output(&parsed)) +} + +fn build_structured_output(parsed: &ExecOutputJson) -> String { + let mut sections = Vec::new(); + sections.push(format!("Exit code: {}", parsed.metadata.exit_code)); + sections.push(format!( + "Wall time: {} seconds", + parsed.metadata.duration_seconds + )); + + if let Some(total_lines) = extract_total_output_lines(&parsed.output) { + sections.push(format!("Total output lines: {total_lines}")); + } + + sections.push("Output:".to_string()); + sections.push(parsed.output.clone()); + + sections.join("\n") +} + +fn extract_total_output_lines(output: &str) -> Option { + let marker_start = output.find("[... omitted ")?; + let marker = &output[marker_start..]; + let (_, after_of) = marker.split_once(" of ")?; + let (total_segment, _) = after_of.split_once(' ')?; + total_segment.parse::().ok() +} + #[derive(Debug)] pub enum ResponseEvent { Created, diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 61512127..6fb83cee 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -750,6 +750,7 @@ pub struct ConfigToml { pub experimental_use_exec_command_tool: Option, pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, + pub experimental_use_freeform_apply_patch: Option, pub projects: Option>, @@ -1111,7 +1112,9 @@ impl Config { .or(cfg.chatgpt_base_url) .unwrap_or("https://chatgpt.com/backend-api/".to_string()), include_plan_tool: include_plan_tool.unwrap_or(false), - include_apply_patch_tool: include_apply_patch_tool.unwrap_or(false), + include_apply_patch_tool: include_apply_patch_tool + .or(cfg.experimental_use_freeform_apply_patch) + .unwrap_or(false), tools_web_search_request, use_experimental_streamable_shell_tool: cfg .experimental_use_exec_command_tool diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index 5ddaaca2..71d2f9b3 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -110,6 +110,7 @@ pub fn find_family_for_model(slug: &str) -> Option { reasoning_summary_format: ReasoningSummaryFormat::Experimental, base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), experimental_supported_tools: vec!["read_file".to_string()], + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), ) } else if slug.starts_with("gpt-5") { model_family!( diff --git a/codex-rs/core/src/tools/handlers/apply_patch.rs b/codex-rs/core/src/tools/handlers/apply_patch.rs index ced0898a..1ad8a95d 100644 --- a/codex-rs/core/src/tools/handlers/apply_patch.rs +++ b/codex-rs/core/src/tools/handlers/apply_patch.rs @@ -106,7 +106,7 @@ pub enum ApplyPatchToolType { pub(crate) fn create_apply_patch_freeform_tool() -> ToolSpec { ToolSpec::Freeform(FreeformTool { name: "apply_patch".to_string(), - description: "Use the `apply_patch` tool to edit files".to_string(), + description: "Use the `apply_patch` tool to edit files. This is a FREEFORM tool, so do not wrap the patch in JSON.".to_string(), format: FreeformToolFormat { r#type: "grammar".to_string(), syntax: "lark".to_string(), diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 5e24e46e..7bd622a3 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -695,7 +695,7 @@ mod tests { }); let (tools, _) = build_specs(&config, Some(HashMap::new())).build(); - assert_eq_tool_names(&tools, &["unified_exec", "read_file"]); + assert_eq_tool_names(&tools, &["unified_exec", "apply_patch", "read_file"]); } #[test] @@ -921,6 +921,7 @@ mod tests { &tools, &[ "unified_exec", + "apply_patch", "read_file", "web_search", "view_image", @@ -929,7 +930,7 @@ mod tests { ); assert_eq!( - tools[4], + tools[5], ToolSpec::Function(ResponsesApiTool { name: "dash/search".to_string(), parameters: JsonSchema::Object { @@ -988,6 +989,7 @@ mod tests { &tools, &[ "unified_exec", + "apply_patch", "read_file", "web_search", "view_image", @@ -995,7 +997,7 @@ mod tests { ], ); assert_eq!( - tools[4], + tools[5], ToolSpec::Function(ResponsesApiTool { name: "dash/paginate".to_string(), parameters: JsonSchema::Object { @@ -1019,7 +1021,7 @@ mod tests { let config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, include_plan_tool: false, - include_apply_patch_tool: false, + include_apply_patch_tool: true, include_web_search_request: true, use_streamable_shell_tool: false, include_view_image_tool: true, @@ -1052,6 +1054,7 @@ mod tests { &tools, &[ "unified_exec", + "apply_patch", "read_file", "web_search", "view_image", @@ -1059,7 +1062,7 @@ mod tests { ], ); assert_eq!( - tools[4], + tools[5], ToolSpec::Function(ResponsesApiTool { name: "dash/tags".to_string(), parameters: JsonSchema::Object { @@ -1119,6 +1122,7 @@ mod tests { &tools, &[ "unified_exec", + "apply_patch", "read_file", "web_search", "view_image", @@ -1126,7 +1130,7 @@ mod tests { ], ); assert_eq!( - tools[4], + tools[5], ToolSpec::Function(ResponsesApiTool { name: "dash/value".to_string(), parameters: JsonSchema::Object { @@ -1223,6 +1227,7 @@ mod tests { &tools, &[ "unified_exec", + "apply_patch", "read_file", "web_search", "view_image", @@ -1231,7 +1236,7 @@ mod tests { ); assert_eq!( - tools[4], + tools[5], ToolSpec::Function(ResponsesApiTool { name: "test_server/do_something_cool".to_string(), parameters: JsonSchema::Object { diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 5f2892cb..27bddb7e 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -20,6 +20,7 @@ mod review; mod rmcp_client; mod rollout_list_find; mod seatbelt; +mod shell_serialization; mod stream_error_allows_next_turn; mod stream_no_completed; mod tool_harness; diff --git a/codex-rs/core/tests/suite/model_tools.rs b/codex-rs/core/tests/suite/model_tools.rs index 9c29e469..c4594d46 100644 --- a/codex-rs/core/tests/suite/model_tools.rs +++ b/codex-rs/core/tests/suite/model_tools.rs @@ -125,7 +125,11 @@ async fn model_selects_expected_tools() { let gpt5_codex_tools = collect_tool_identifiers_for_model("gpt-5-codex").await; assert_eq!( gpt5_codex_tools, - vec!["shell".to_string(), "read_file".to_string()], + vec![ + "shell".to_string(), + "apply_patch".to_string(), + "read_file".to_string() + ], "gpt-5-codex should expose the beta read_file tool", ); } diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs new file mode 100644 index 00000000..f8f9960b --- /dev/null +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -0,0 +1,276 @@ +#![cfg(not(target_os = "windows"))] + +use anyhow::Result; +use codex_core::model_family::find_family_for_model; +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::InputItem; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use serde_json::Value; +use serde_json::json; + +async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> { + let session_model = test.session_configured.model.clone(); + + test.codex + .submit(Op::UserTurn { + items: vec![InputItem::Text { + text: prompt.into(), + }], + final_output_json_schema: None, + cwd: test.cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy, + model: session_model, + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + loop { + let event = test.codex.next_event().await?; + if matches!(event.msg, EventMsg::TaskComplete(_)) { + break; + } + } + + Ok(()) +} + +fn request_bodies(requests: &[wiremock::Request]) -> Result> { + requests + .iter() + .map(|req| Ok(serde_json::from_slice::(&req.body)?)) + .collect() +} + +fn find_function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> { + for body in bodies { + if let Some(items) = body.get("input").and_then(Value::as_array) { + for item in items { + if item.get("type").and_then(Value::as_str) == Some("function_call_output") + && item.get("call_id").and_then(Value::as_str) == Some(call_id) + { + return Some(item); + } + } + } + } + None +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.include_apply_patch_tool = false; + config.model = "gpt-5".to_string(); + config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family"); + }); + let test = builder.build(&server).await?; + + let call_id = "shell-json"; + let args = json!({ + "command": ["/bin/echo", "shell json"], + "timeout_ms": 1_000, + }); + let responses = vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "run the json shell command", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = server + .received_requests() + .await + .expect("recorded requests present"); + let bodies = request_bodies(&requests)?; + let output_item = find_function_call_output(&bodies, call_id).expect("shell output present"); + let output = output_item + .get("output") + .and_then(Value::as_str) + .expect("shell output string"); + + let parsed: Value = serde_json::from_str(output)?; + assert_eq!( + parsed + .get("metadata") + .and_then(|metadata| metadata.get("exit_code")) + .and_then(Value::as_i64), + Some(0), + "expected zero exit code in unformatted JSON output", + ); + let stdout = parsed + .get("output") + .and_then(Value::as_str) + .unwrap_or_default(); + assert!( + stdout.contains("shell json"), + "expected stdout to include command output, got {stdout:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.include_apply_patch_tool = true; + }); + let test = builder.build(&server).await?; + + let call_id = "shell-structured"; + let args = json!({ + "command": ["/bin/echo", "freeform shell"], + "timeout_ms": 1_000, + }); + let responses = vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "run the structured shell command", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = server + .received_requests() + .await + .expect("recorded requests present"); + let bodies = request_bodies(&requests)?; + let output_item = + find_function_call_output(&bodies, call_id).expect("structured output present"); + let output = output_item + .get("output") + .and_then(Value::as_str) + .expect("structured output string"); + + assert!( + serde_json::from_str::(output).is_err(), + "expected structured shell output to be plain text", + ); + assert!( + output.starts_with("Exit code: 0\n"), + "expected exit code prefix, got {output:?}", + ); + assert!( + output.contains("\nOutput:\n"), + "expected Output section, got {output:?}" + ); + assert!( + output.contains("freeform shell"), + "expected stdout content, got {output:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn shell_output_reserializes_truncated_content() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5-codex".to_string(); + config.model_family = + find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family"); + }); + let test = builder.build(&server).await?; + + let call_id = "shell-truncated"; + let args = json!({ + "command": ["/bin/sh", "-c", "seq 1 400"], + "timeout_ms": 1_000, + }); + let responses = vec![ + sse(vec![ + json!({"type": "response.created", "response": {"id": "resp-1"}}), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + submit_turn( + &test, + "run the truncation shell command", + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = server + .received_requests() + .await + .expect("recorded requests present"); + let bodies = request_bodies(&requests)?; + let output_item = + find_function_call_output(&bodies, call_id).expect("truncated output present"); + let output = output_item + .get("output") + .and_then(Value::as_str) + .expect("truncated output string"); + + assert!( + serde_json::from_str::(output).is_err(), + "expected truncated shell output to be plain text", + ); + assert!( + output.starts_with("Exit code: 0\n"), + "expected exit code prefix, got {output:?}", + ); + assert!( + output.lines().any(|line| line == "Total output lines: 400"), + "expected total output lines marker, got {output:?}", + ); + assert!( + output.contains("[... omitted"), + "expected truncated marker, got {output:?}", + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/tool_harness.rs b/codex-rs/core/tests/suite/tool_harness.rs index 317e530c..156a7a95 100644 --- a/codex-rs/core/tests/suite/tool_harness.rs +++ b/codex-rs/core/tests/suite/tool_harness.rs @@ -1,5 +1,6 @@ #![cfg(not(target_os = "windows"))] +use codex_core::model_family::find_family_for_model; use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; @@ -53,7 +54,8 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()> let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { - config.include_apply_patch_tool = true; + config.model = "gpt-5".to_string(); + config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model"); }); let TestCodex { codex, diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index 824a629b..b3c067ec 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -2,6 +2,7 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] use anyhow::Result; +use codex_core::model_family::find_family_for_model; use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; @@ -147,7 +148,10 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex(); + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5".to_string(); + config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model"); + }); let test = builder.build(&server).await?; let command = ["/bin/echo", "shell ok"]; @@ -375,7 +379,10 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let mut builder = test_codex(); + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5".to_string(); + config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model"); + }); let test = builder.build(&server).await?; let call_id = "shell-timeout"; diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index e3bf6e52..18e09778 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -177,7 +177,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any codex_linux_sandbox_exe, base_instructions: None, include_plan_tool: Some(include_plan_tool), - include_apply_patch_tool: Some(true), + include_apply_patch_tool: None, include_view_image_tool: None, show_raw_agent_reasoning: oss.then_some(true), tools_web_search_request: None,