diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index ef07a36d..a0dd9133 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -751,6 +751,7 @@ dependencies = [ "codex-common", "codex-core", "codex-ollama", + "core_test_support", "libc", "owo-colors", "predicates", @@ -760,6 +761,7 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber", + "wiremock", ] [[package]] diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 482ac2f1..edc034ee 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -67,6 +67,7 @@ use crate::models::ReasoningItemReasoningSummary; use crate::models::ResponseInputItem; use crate::models::ResponseItem; use crate::models::ShellToolCallParams; +use crate::openai_tools::ApplyPatchToolArgs; use crate::openai_tools::ToolsConfig; use crate::openai_tools::get_openai_tools; use crate::parse_command::parse_command; @@ -455,6 +456,7 @@ impl Session { approval_policy, sandbox_policy.clone(), config.include_plan_tool, + config.include_apply_patch_tool, ), tx_event: tx_event.clone(), user_instructions, @@ -1727,6 +1729,30 @@ async fn handle_function_call( handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id) .await } + "apply_patch" => { + let args = match serde_json::from_str::(&arguments) { + Ok(a) => a, + Err(e) => { + return ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + content: format!("failed to parse function arguments: {e}"), + success: None, + }, + }; + } + }; + let exec_params = ExecParams { + command: vec!["apply_patch".to_string(), args.input.clone()], + cwd: sess.cwd.clone(), + timeout_ms: None, + env: HashMap::new(), + with_escalated_permissions: None, + justification: None, + }; + handle_container_exec_with_params(exec_params, sess, turn_diff_tracker, sub_id, call_id) + .await + } "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await, _ => { match sess.mcp_connection_manager.parse_tool_name(&name) { diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index b17bb808..e2a68d07 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -156,6 +156,11 @@ pub struct Config { /// Include an experimental plan tool that the model can use to update its current plan and status of each step. pub include_plan_tool: bool, + /// Include the `apply_patch` tool for models that benefit from invoking + /// file edits as a structured tool call. When unset, this falls back to the + /// model family's default preference. + pub include_apply_patch_tool: bool, + /// The value for the `originator` header included with Responses API requests. pub internal_originator: Option, } @@ -480,6 +485,7 @@ pub struct ConfigOverrides { pub codex_linux_sandbox_exe: Option, pub base_instructions: Option, pub include_plan_tool: Option, + pub include_apply_patch_tool: Option, pub disable_response_storage: Option, pub show_raw_agent_reasoning: Option, } @@ -505,6 +511,7 @@ impl Config { codex_linux_sandbox_exe, base_instructions, include_plan_tool, + include_apply_patch_tool, disable_response_storage, show_raw_agent_reasoning, } = overrides; @@ -581,6 +588,7 @@ impl Config { needs_special_apply_patch_instructions: false, supports_reasoning_summaries, uses_local_shell_tool: false, + uses_apply_patch_tool: false, } }); @@ -607,6 +615,9 @@ impl Config { Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?; let base_instructions = base_instructions.or(file_base_instructions); + let include_apply_patch_tool_val = + include_apply_patch_tool.unwrap_or(model_family.uses_apply_patch_tool); + let config = Self { model, model_family, @@ -659,6 +670,7 @@ impl Config { experimental_resume, include_plan_tool: include_plan_tool.unwrap_or(false), + include_apply_patch_tool: include_apply_patch_tool_val, internal_originator: cfg.internal_originator, }; Ok(config) @@ -1022,6 +1034,7 @@ disable_response_storage = true experimental_resume: None, base_instructions: None, include_plan_tool: false, + include_apply_patch_tool: false, internal_originator: None, }, o3_profile_config @@ -1073,6 +1086,7 @@ disable_response_storage = true experimental_resume: None, base_instructions: None, include_plan_tool: false, + include_apply_patch_tool: false, internal_originator: None, }; @@ -1139,6 +1153,7 @@ disable_response_storage = true experimental_resume: None, base_instructions: None, include_plan_tool: false, + include_apply_patch_tool: false, internal_originator: None, }; diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index fa4826d7..6d1c2efc 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -23,6 +23,10 @@ pub struct ModelFamily { // the model such that its description can be omitted. // See https://platform.openai.com/docs/guides/tools-local-shell pub uses_local_shell_tool: bool, + + /// True if the model performs better when `apply_patch` is provided as + /// a tool call instead of just a bash command. + pub uses_apply_patch_tool: bool, } macro_rules! model_family { @@ -36,6 +40,7 @@ macro_rules! model_family { needs_special_apply_patch_instructions: false, supports_reasoning_summaries: false, uses_local_shell_tool: false, + uses_apply_patch_tool: false, }; // apply overrides $( @@ -55,6 +60,7 @@ macro_rules! simple_model_family { needs_special_apply_patch_instructions: false, supports_reasoning_summaries: false, uses_local_shell_tool: false, + uses_apply_patch_tool: false, }) }}; } @@ -88,10 +94,10 @@ pub fn find_family_for_model(slug: &str) -> Option { slug, "gpt-4.1", needs_special_apply_patch_instructions: true, ) + } else if slug.starts_with("gpt-oss") { + model_family!(slug, "gpt-oss", uses_apply_patch_tool: true) } else if slug.starts_with("gpt-4o") { simple_model_family!(slug, "gpt-4o") - } else if slug.starts_with("gpt-oss") { - simple_model_family!(slug, "gpt-oss") } else if slug.starts_with("gpt-3.5") { simple_model_family!(slug, "gpt-3.5") } else if slug.starts_with("gpt-5") { diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs index 7c658804..32ead20e 100644 --- a/codex-rs/core/src/openai_tools.rs +++ b/codex-rs/core/src/openai_tools.rs @@ -43,6 +43,7 @@ pub enum ConfigShellToolType { pub struct ToolsConfig { pub shell_type: ConfigShellToolType, pub plan_tool: bool, + pub apply_patch_tool: bool, } impl ToolsConfig { @@ -51,6 +52,7 @@ impl ToolsConfig { approval_policy: AskForApproval, sandbox_policy: SandboxPolicy, include_plan_tool: bool, + include_apply_patch_tool: bool, ) -> Self { let mut shell_type = if model_family.uses_local_shell_tool { ConfigShellToolType::LocalShell @@ -66,6 +68,7 @@ impl ToolsConfig { Self { shell_type, plan_tool: include_plan_tool, + apply_patch_tool: include_apply_patch_tool || model_family.uses_apply_patch_tool, } } } @@ -235,6 +238,87 @@ The shell tool is used to execute shell commands. }) } +#[derive(Serialize, Deserialize)] +pub(crate) struct ApplyPatchToolArgs { + pub(crate) input: String, +} + +fn create_apply_patch_tool() -> OpenAiTool { + // Minimal schema: one required string argument containing the patch body + let mut properties = BTreeMap::new(); + properties.insert( + "input".to_string(), + JsonSchema::String { + description: Some(r#"The entire contents of the apply_patch command"#.to_string()), + }, + ); + + OpenAiTool::Function(ResponsesApiTool { + name: "apply_patch".to_string(), + description: r#"Use this tool to edit files. +Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope: + +**_ Begin Patch +[ one or more file sections ] +_** End Patch + +Within that envelope, you get a sequence of file operations. +You MUST include a header to specify the action you are taking. +Each operation starts with one of three headers: + +**_ Add File: - create a new file. Every following line is a + line (the initial contents). +_** Delete File: - remove an existing file. Nothing follows. +\*\*\* Update File: - patch an existing file in place (optionally with a rename). + +May be immediately followed by \*\*\* Move to: if you want to rename the file. +Then one or more “hunks”, each introduced by @@ (optionally followed by a hunk header). +Within a hunk each line starts with: + +- for inserted text, + +* for removed text, or + space ( ) for context. + At the end of a truncated hunk you can emit \*\*\* End of File. + +Patch := Begin { FileOp } End +Begin := "**_ Begin Patch" NEWLINE +End := "_** End Patch" NEWLINE +FileOp := AddFile | DeleteFile | UpdateFile +AddFile := "**_ Add File: " path NEWLINE { "+" line NEWLINE } +DeleteFile := "_** Delete File: " path NEWLINE +UpdateFile := "**_ Update File: " path NEWLINE [ MoveTo ] { Hunk } +MoveTo := "_** Move to: " newPath NEWLINE +Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ] +HunkLine := (" " | "-" | "+") text NEWLINE + +A full patch can combine several operations: + +**_ Begin Patch +_** Add File: hello.txt ++Hello world +**_ Update File: src/app.py +_** Move to: src/main.py +@@ def greet(): +-print("Hi") ++print("Hello, world!") +**_ Delete File: obsolete.txt +_** End Patch + +It is important to remember: + +- You must include a header with your intended action (Add/Delete/Update) +- You must prefix new lines with `+` even when creating a new file +"# + .to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: Some(vec!["input".to_string()]), + additional_properties: Some(false), + }, + }) +} + /// Returns JSON values that are compatible with Function Calling in the /// Responses API: /// https://platform.openai.com/docs/guides/function-calling?api-mode=responses @@ -455,6 +539,10 @@ pub(crate) fn get_openai_tools( tools.push(PLAN_TOOL.clone()); } + if config.apply_patch_tool { + tools.push(create_apply_patch_tool()); + } + if let Some(mcp_tools) = mcp_tools { for (name, tool) in mcp_tools { match mcp_tool_to_openai_tool(name.clone(), tool.clone()) { @@ -508,6 +596,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, true, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools(&config, Some(HashMap::new())); @@ -522,6 +611,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, true, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools(&config, Some(HashMap::new())); @@ -536,6 +626,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, false, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools( &config, @@ -629,6 +720,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, false, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools( @@ -684,6 +776,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, false, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools( @@ -734,6 +827,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, false, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools( @@ -787,6 +881,7 @@ mod tests { AskForApproval::Never, SandboxPolicy::ReadOnly, false, + model_family.uses_apply_patch_tool, ); let tools = get_openai_tools( diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 0a9c8d5a..244d093e 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -47,6 +47,26 @@ pub fn load_sse_fixture(path: impl AsRef) -> String { .collect() } +pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String { + let replaced = raw.replace("__ID__", id); + let events: Vec = + serde_json::from_str(&replaced).expect("parse JSON fixture"); + events + .into_iter() + .map(|e| { + let kind = e + .get("type") + .and_then(|v| v.as_str()) + .expect("fixture event missing type"); + if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { + format!("event: {kind}\n\n") + } else { + format!("event: {kind}\ndata: {e}\n\n") + } + }) + .collect() +} + /// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the /// fixture template with the supplied identifier before parsing. This lets a /// single JSON template be reused by multiple tests that each need a unique diff --git a/codex-rs/exec/Cargo.toml b/codex-rs/exec/Cargo.toml index b7c20df3..9847788d 100644 --- a/codex-rs/exec/Cargo.toml +++ b/codex-rs/exec/Cargo.toml @@ -44,3 +44,5 @@ assert_cmd = "2" libc = "0.2" predicates = "3" tempfile = "3.13.0" +wiremock = "0.6" +core_test_support = { path = "../core/tests/common" } diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index ff6123d7..e6b4d7fb 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -146,6 +146,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any codex_linux_sandbox_exe, base_instructions: None, include_plan_tool: None, + include_apply_patch_tool: None, disable_response_storage: oss.then_some(true), show_raw_agent_reasoning: oss.then_some(true), }; diff --git a/codex-rs/exec/tests/apply_patch.rs b/codex-rs/exec/tests/apply_patch.rs index f65d32e1..ecce43d7 100644 --- a/codex-rs/exec/tests/apply_patch.rs +++ b/codex-rs/exec/tests/apply_patch.rs @@ -1,3 +1,5 @@ +#![allow(clippy::expect_used, clippy::unwrap_used)] + use anyhow::Context; use assert_cmd::prelude::*; use codex_core::CODEX_APPLY_PATCH_ARG1; @@ -37,3 +39,152 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> { ); Ok(()) } + +#[cfg(not(target_os = "windows"))] +#[tokio::test] +async fn test_apply_patch_tool() -> anyhow::Result<()> { + use core_test_support::load_sse_fixture_with_id_from_str; + use tempfile::TempDir; + use wiremock::Mock; + use wiremock::MockServer; + use wiremock::ResponseTemplate; + use wiremock::matchers::method; + use wiremock::matchers::path; + + const SSE_TOOL_CALL_ADD: &str = r#"[ + { + "type": "response.output_item.done", + "item": { + "type": "function_call", + "name": "apply_patch", + "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}", + "call_id": "__ID__" + } + }, + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +]"#; + + const SSE_TOOL_CALL_UPDATE: &str = r#"[ + { + "type": "response.output_item.done", + "item": { + "type": "function_call", + "name": "apply_patch", + "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}", + "call_id": "__ID__" + } + }, + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +]"#; + + const SSE_TOOL_CALL_COMPLETED: &str = r#"[ + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +]"#; + + // Start a mock model server + let server = MockServer::start().await; + + // First response: model calls apply_patch to create test.md + let first = ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw( + load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"), + "text/event-stream", + ); + + Mock::given(method("POST")) + // .and(path("/v1/responses")) + .respond_with(first) + .up_to_n_times(1) + .mount(&server) + .await; + + // Second response: model calls apply_patch to update test.md + let second = ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw( + load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"), + "text/event-stream", + ); + + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(second) + .up_to_n_times(1) + .mount(&server) + .await; + + let final_completed = ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw( + load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"), + "text/event-stream", + ); + + Mock::given(method("POST")) + // .and(path("/v1/responses")) + .respond_with(final_completed) + .expect(1) + .mount(&server) + .await; + + let tmp_cwd = TempDir::new().unwrap(); + Command::cargo_bin("codex-exec") + .context("should find binary for codex-exec")? + .current_dir(tmp_cwd.path()) + .env("CODEX_HOME", tmp_cwd.path()) + .env("OPENAI_API_KEY", "dummy") + .env("OPENAI_BASE_URL", format!("{}/v1", server.uri())) + .arg("--skip-git-repo-check") + .arg("-s") + .arg("workspace-write") + .arg("foo") + .assert() + .success(); + + // Verify final file contents + let final_path = tmp_cwd.path().join("test.md"); + let contents = std::fs::read_to_string(&final_path) + .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display())); + assert_eq!(contents, "Final text\n"); + Ok(()) +} diff --git a/codex-rs/mcp-server/src/codex_message_processor.rs b/codex-rs/mcp-server/src/codex_message_processor.rs index 2495feb9..cdf24d92 100644 --- a/codex-rs/mcp-server/src/codex_message_processor.rs +++ b/codex-rs/mcp-server/src/codex_message_processor.rs @@ -367,6 +367,7 @@ fn derive_config_from_params( config: cli_overrides, base_instructions, include_plan_tool, + include_apply_patch_tool, } = params; let overrides = ConfigOverrides { model, @@ -378,6 +379,7 @@ fn derive_config_from_params( codex_linux_sandbox_exe, base_instructions, include_plan_tool, + include_apply_patch_tool, disable_response_storage: None, show_raw_agent_reasoning: None, }; diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index 548f2933..906921a0 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -160,6 +160,7 @@ impl CodexToolCallParam { codex_linux_sandbox_exe, base_instructions, include_plan_tool, + include_apply_patch_tool: None, disable_response_storage: None, show_raw_agent_reasoning: None, }; diff --git a/codex-rs/mcp-server/src/tool_handlers/create_conversation.rs b/codex-rs/mcp-server/src/tool_handlers/create_conversation.rs index 77a68f01..eee2e1d5 100644 --- a/codex-rs/mcp-server/src/tool_handlers/create_conversation.rs +++ b/codex-rs/mcp-server/src/tool_handlers/create_conversation.rs @@ -52,6 +52,7 @@ pub(crate) async fn handle_create_conversation( codex_linux_sandbox_exe: None, base_instructions, include_plan_tool: None, + include_apply_patch_tool: None, disable_response_storage: None, show_raw_agent_reasoning: None, }; diff --git a/codex-rs/mcp-server/src/wire_format.rs b/codex-rs/mcp-server/src/wire_format.rs index 034c72a1..e2ba729e 100644 --- a/codex-rs/mcp-server/src/wire_format.rs +++ b/codex-rs/mcp-server/src/wire_format.rs @@ -90,6 +90,10 @@ pub struct NewConversationParams { /// Whether to include the plan tool in the conversation. #[serde(skip_serializing_if = "Option::is_none")] pub include_plan_tool: Option, + + /// Whether to include the apply patch tool in the conversation. + #[serde(skip_serializing_if = "Option::is_none")] + pub include_apply_patch_tool: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] @@ -241,6 +245,7 @@ mod tests { config: None, base_instructions: None, include_plan_tool: None, + include_apply_patch_tool: None, }, }; assert_eq!( diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index f1a7d99b..7d605d68 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -116,6 +116,7 @@ pub async fn run_main( codex_linux_sandbox_exe, base_instructions: None, include_plan_tool: Some(true), + include_apply_patch_tool: None, disable_response_storage: cli.oss.then_some(true), show_raw_agent_reasoning: cli.oss.then_some(true), };