[tools] Add apply_patch tool (#2303)

## Summary We've been seeing a number of issues and reports with our synthetic `apply_patch` tool, e.g. #802. Let's make this a real tool - in my anecdotal testing, it's critical for GPT-OSS models, but I'd like to make it the standard across GPT-5 and codex models as well. ## Testing - [x] Tested locally - [x] Integration test
2025-08-15 11:55:53 -04:00
parent 917e29803b
commit 6df8e35314
14 changed files with 330 additions and 2 deletions
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -751,6 +751,7 @@ dependencies = [
 "codex-common",
 "codex-core",
 "codex-ollama",
+ "core_test_support",
 "libc",
 "owo-colors",
 "predicates",
@@ -760,6 +761,7 @@ dependencies = [
 "tokio",
 "tracing",
 "tracing-subscriber",
+ "wiremock",
 ]

 [[package]]
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -67,6 +67,7 @@ use crate::models::ReasoningItemReasoningSummary;
 use crate::models::ResponseInputItem;
 use crate::models::ResponseItem;
 use crate::models::ShellToolCallParams;
+use crate::openai_tools::ApplyPatchToolArgs;
 use crate::openai_tools::ToolsConfig;
 use crate::openai_tools::get_openai_tools;
 use crate::parse_command::parse_command;
@@ -455,6 +456,7 @@ impl Session {
                approval_policy,
                sandbox_policy.clone(),
                config.include_plan_tool,
+                config.include_apply_patch_tool,
            ),
            tx_event: tx_event.clone(),
            user_instructions,
@@ -1727,6 +1729,30 @@ async fn handle_function_call(
            handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id)
                .await
        }
+        "apply_patch" => {
+            let args = match serde_json::from_str::<ApplyPatchToolArgs>(&arguments) {
+                Ok(a) => a,
+                Err(e) => {
+                    return ResponseInputItem::FunctionCallOutput {
+                        call_id,
+                        output: FunctionCallOutputPayload {
+                            content: format!("failed to parse function arguments: {e}"),
+                            success: None,
+                        },
+                    };
+                }
+            };
+            let exec_params = ExecParams {
+                command: vec!["apply_patch".to_string(), args.input.clone()],
+                cwd: sess.cwd.clone(),
+                timeout_ms: None,
+                env: HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            };
+            handle_container_exec_with_params(exec_params, sess, turn_diff_tracker, sub_id, call_id)
+                .await
+        }
        "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
        _ => {
            match sess.mcp_connection_manager.parse_tool_name(&name) {
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -156,6 +156,11 @@ pub struct Config {
    /// Include an experimental plan tool that the model can use to update its current plan and status of each step.
    pub include_plan_tool: bool,

+    /// Include the `apply_patch` tool for models that benefit from invoking
+    /// file edits as a structured tool call. When unset, this falls back to the
+    /// model family's default preference.
+    pub include_apply_patch_tool: bool,
+
    /// The value for the `originator` header included with Responses API requests.
    pub internal_originator: Option<String>,
 }
@@ -480,6 +485,7 @@ pub struct ConfigOverrides {
    pub codex_linux_sandbox_exe: Option<PathBuf>,
    pub base_instructions: Option<String>,
    pub include_plan_tool: Option<bool>,
+    pub include_apply_patch_tool: Option<bool>,
    pub disable_response_storage: Option<bool>,
    pub show_raw_agent_reasoning: Option<bool>,
 }
@@ -505,6 +511,7 @@ impl Config {
            codex_linux_sandbox_exe,
            base_instructions,
            include_plan_tool,
+            include_apply_patch_tool,
            disable_response_storage,
            show_raw_agent_reasoning,
        } = overrides;
@@ -581,6 +588,7 @@ impl Config {
                needs_special_apply_patch_instructions: false,
                supports_reasoning_summaries,
                uses_local_shell_tool: false,
+                uses_apply_patch_tool: false,
            }
        });

@@ -607,6 +615,9 @@ impl Config {
            Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
        let base_instructions = base_instructions.or(file_base_instructions);

+        let include_apply_patch_tool_val =
+            include_apply_patch_tool.unwrap_or(model_family.uses_apply_patch_tool);
+
        let config = Self {
            model,
            model_family,
@@ -659,6 +670,7 @@ impl Config {

            experimental_resume,
            include_plan_tool: include_plan_tool.unwrap_or(false),
+            include_apply_patch_tool: include_apply_patch_tool_val,
            internal_originator: cfg.internal_originator,
        };
        Ok(config)
@@ -1022,6 +1034,7 @@ disable_response_storage = true
                experimental_resume: None,
                base_instructions: None,
                include_plan_tool: false,
+                include_apply_patch_tool: false,
                internal_originator: None,
            },
            o3_profile_config
@@ -1073,6 +1086,7 @@ disable_response_storage = true
            experimental_resume: None,
            base_instructions: None,
            include_plan_tool: false,
+            include_apply_patch_tool: false,
            internal_originator: None,
        };

@@ -1139,6 +1153,7 @@ disable_response_storage = true
            experimental_resume: None,
            base_instructions: None,
            include_plan_tool: false,
+            include_apply_patch_tool: false,
            internal_originator: None,
        };

--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -23,6 +23,10 @@ pub struct ModelFamily {
    // the model such that its description can be omitted.
    // See https://platform.openai.com/docs/guides/tools-local-shell
    pub uses_local_shell_tool: bool,
+
+    /// True if the model performs better when `apply_patch` is provided as
+    /// a tool call instead of just a bash command.
+    pub uses_apply_patch_tool: bool,
 }

 macro_rules! model_family {
@@ -36,6 +40,7 @@ macro_rules! model_family {
            needs_special_apply_patch_instructions: false,
            supports_reasoning_summaries: false,
            uses_local_shell_tool: false,
+            uses_apply_patch_tool: false,
        };
        // apply overrides
        $(
@@ -55,6 +60,7 @@ macro_rules! simple_model_family {
            needs_special_apply_patch_instructions: false,
            supports_reasoning_summaries: false,
            uses_local_shell_tool: false,
+            uses_apply_patch_tool: false,
        })
    }};
 }
@@ -88,10 +94,10 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
            slug, "gpt-4.1",
            needs_special_apply_patch_instructions: true,
        )
+    } else if slug.starts_with("gpt-oss") {
+        model_family!(slug, "gpt-oss", uses_apply_patch_tool: true)
    } else if slug.starts_with("gpt-4o") {
        simple_model_family!(slug, "gpt-4o")
-    } else if slug.starts_with("gpt-oss") {
-        simple_model_family!(slug, "gpt-oss")
    } else if slug.starts_with("gpt-3.5") {
        simple_model_family!(slug, "gpt-3.5")
    } else if slug.starts_with("gpt-5") {
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -43,6 +43,7 @@ pub enum ConfigShellToolType {
 pub struct ToolsConfig {
    pub shell_type: ConfigShellToolType,
    pub plan_tool: bool,
+    pub apply_patch_tool: bool,
 }

 impl ToolsConfig {
@@ -51,6 +52,7 @@ impl ToolsConfig {
        approval_policy: AskForApproval,
        sandbox_policy: SandboxPolicy,
        include_plan_tool: bool,
+        include_apply_patch_tool: bool,
    ) -> Self {
        let mut shell_type = if model_family.uses_local_shell_tool {
            ConfigShellToolType::LocalShell
@@ -66,6 +68,7 @@ impl ToolsConfig {
        Self {
            shell_type,
            plan_tool: include_plan_tool,
+            apply_patch_tool: include_apply_patch_tool || model_family.uses_apply_patch_tool,
        }
    }
 }
@@ -235,6 +238,87 @@ The shell tool is used to execute shell commands.
    })
 }

+#[derive(Serialize, Deserialize)]
+pub(crate) struct ApplyPatchToolArgs {
+    pub(crate) input: String,
+}
+
+fn create_apply_patch_tool() -> OpenAiTool {
+    // Minimal schema: one required string argument containing the patch body
+    let mut properties = BTreeMap::new();
+    properties.insert(
+        "input".to_string(),
+        JsonSchema::String {
+            description: Some(r#"The entire contents of the apply_patch command"#.to_string()),
+        },
+    );
+
+    OpenAiTool::Function(ResponsesApiTool {
+        name: "apply_patch".to_string(),
+        description: r#"Use this tool to edit files.
+Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope:
+
+**_ Begin Patch
+[ one or more file sections ]
+_** End Patch
+
+Within that envelope, you get a sequence of file operations.
+You MUST include a header to specify the action you are taking.
+Each operation starts with one of three headers:
+
+**_ Add File: <path> - create a new file. Every following line is a + line (the initial contents).
+_** Delete File: <path> - remove an existing file. Nothing follows.
+\*\*\* Update File: <path> - patch an existing file in place (optionally with a rename).
+
+May be immediately followed by \*\*\* Move to: <new path> if you want to rename the file.
+Then one or more “hunks”, each introduced by @@ (optionally followed by a hunk header).
+Within a hunk each line starts with:
+
+- for inserted text,
+
+* for removed text, or
+  space ( ) for context.
+  At the end of a truncated hunk you can emit \*\*\* End of File.
+
+Patch := Begin { FileOp } End
+Begin := "**_ Begin Patch" NEWLINE
+End := "_** End Patch" NEWLINE
+FileOp := AddFile | DeleteFile | UpdateFile
+AddFile := "**_ Add File: " path NEWLINE { "+" line NEWLINE }
+DeleteFile := "_** Delete File: " path NEWLINE
+UpdateFile := "**_ Update File: " path NEWLINE [ MoveTo ] { Hunk }
+MoveTo := "_** Move to: " newPath NEWLINE
+Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ]
+HunkLine := (" " | "-" | "+") text NEWLINE
+
+A full patch can combine several operations:
+
+**_ Begin Patch
+_** Add File: hello.txt
+Hello world
+**_ Update File: src/app.py
+_** Move to: src/main.py
+@@ def greet():
+-print("Hi")
+print("Hello, world!")
+**_ Delete File: obsolete.txt
+_** End Patch
+
+It is important to remember:
+
+- You must include a header with your intended action (Add/Delete/Update)
+- You must prefix new lines with `+` even when creating a new file
+"#
+        .to_string(),
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: Some(vec!["input".to_string()]),
+            additional_properties: Some(false),
+        },
+    })
+}
+
 /// Returns JSON values that are compatible with Function Calling in the
 /// Responses API:
 /// https://platform.openai.com/docs/guides/function-calling?api-mode=responses
@@ -455,6 +539,10 @@ pub(crate) fn get_openai_tools(
        tools.push(PLAN_TOOL.clone());
    }

+    if config.apply_patch_tool {
+        tools.push(create_apply_patch_tool());
+    }
+
    if let Some(mcp_tools) = mcp_tools {
        for (name, tool) in mcp_tools {
            match mcp_tool_to_openai_tool(name.clone(), tool.clone()) {
@@ -508,6 +596,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            true,
+            model_family.uses_apply_patch_tool,
        );
        let tools = get_openai_tools(&config, Some(HashMap::new()));

@@ -522,6 +611,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            true,
+            model_family.uses_apply_patch_tool,
        );
        let tools = get_openai_tools(&config, Some(HashMap::new()));

@@ -536,6 +626,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            false,
+            model_family.uses_apply_patch_tool,
        );
        let tools = get_openai_tools(
            &config,
@@ -629,6 +720,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            false,
+            model_family.uses_apply_patch_tool,
        );

        let tools = get_openai_tools(
@@ -684,6 +776,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            false,
+            model_family.uses_apply_patch_tool,
        );

        let tools = get_openai_tools(
@@ -734,6 +827,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            false,
+            model_family.uses_apply_patch_tool,
        );

        let tools = get_openai_tools(
@@ -787,6 +881,7 @@ mod tests {
            AskForApproval::Never,
            SandboxPolicy::ReadOnly,
            false,
+            model_family.uses_apply_patch_tool,
        );

        let tools = get_openai_tools(
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -47,6 +47,26 @@ pub fn load_sse_fixture(path: impl AsRef<std::path::Path>) -> String {
        .collect()
 }

+pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String {
+    let replaced = raw.replace("__ID__", id);
+    let events: Vec<serde_json::Value> =
+        serde_json::from_str(&replaced).expect("parse JSON fixture");
+    events
+        .into_iter()
+        .map(|e| {
+            let kind = e
+                .get("type")
+                .and_then(|v| v.as_str())
+                .expect("fixture event missing type");
+            if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
+                format!("event: {kind}\n\n")
+            } else {
+                format!("event: {kind}\ndata: {e}\n\n")
+            }
+        })
+        .collect()
+}
+
 /// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the
 /// fixture template with the supplied identifier before parsing. This lets a
 /// single JSON template be reused by multiple tests that each need a unique
--- a/codex-rs/exec/Cargo.toml
+++ b/codex-rs/exec/Cargo.toml
@@ -44,3 +44,5 @@ assert_cmd = "2"
 libc = "0.2"
 predicates = "3"
 tempfile = "3.13.0"
+wiremock = "0.6"
+core_test_support = { path = "../core/tests/common" }
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -146,6 +146,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        codex_linux_sandbox_exe,
        base_instructions: None,
        include_plan_tool: None,
+        include_apply_patch_tool: None,
        disable_response_storage: oss.then_some(true),
        show_raw_agent_reasoning: oss.then_some(true),
    };
--- a/codex-rs/exec/tests/apply_patch.rs
+++ b/codex-rs/exec/tests/apply_patch.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
 use anyhow::Context;
 use assert_cmd::prelude::*;
 use codex_core::CODEX_APPLY_PATCH_ARG1;
@@ -37,3 +39,152 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
    );
    Ok(())
 }
+
+#[cfg(not(target_os = "windows"))]
+#[tokio::test]
+async fn test_apply_patch_tool() -> anyhow::Result<()> {
+    use core_test_support::load_sse_fixture_with_id_from_str;
+    use tempfile::TempDir;
+    use wiremock::Mock;
+    use wiremock::MockServer;
+    use wiremock::ResponseTemplate;
+    use wiremock::matchers::method;
+    use wiremock::matchers::path;
+
+    const SSE_TOOL_CALL_ADD: &str = r#"[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "function_call",
+      "name": "apply_patch",
+      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]"#;
+
+    const SSE_TOOL_CALL_UPDATE: &str = r#"[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "function_call",
+      "name": "apply_patch",
+      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]"#;
+
+    const SSE_TOOL_CALL_COMPLETED: &str = r#"[
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]"#;
+
+    // Start a mock model server
+    let server = MockServer::start().await;
+
+    // First response: model calls apply_patch to create test.md
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(
+            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
+            "text/event-stream",
+        );
+
+    Mock::given(method("POST"))
+        // .and(path("/v1/responses"))
+        .respond_with(first)
+        .up_to_n_times(1)
+        .mount(&server)
+        .await;
+
+    // Second response: model calls apply_patch to update test.md
+    let second = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(
+            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
+            "text/event-stream",
+        );
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(second)
+        .up_to_n_times(1)
+        .mount(&server)
+        .await;
+
+    let final_completed = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(
+            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
+            "text/event-stream",
+        );
+
+    Mock::given(method("POST"))
+        // .and(path("/v1/responses"))
+        .respond_with(final_completed)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let tmp_cwd = TempDir::new().unwrap();
+    Command::cargo_bin("codex-exec")
+        .context("should find binary for codex-exec")?
+        .current_dir(tmp_cwd.path())
+        .env("CODEX_HOME", tmp_cwd.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("workspace-write")
+        .arg("foo")
+        .assert()
+        .success();
+
+    // Verify final file contents
+    let final_path = tmp_cwd.path().join("test.md");
+    let contents = std::fs::read_to_string(&final_path)
+        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
+    assert_eq!(contents, "Final text\n");
+    Ok(())
+}
--- a/codex-rs/mcp-server/src/codex_message_processor.rs
+++ b/codex-rs/mcp-server/src/codex_message_processor.rs
@@ -367,6 +367,7 @@ fn derive_config_from_params(
        config: cli_overrides,
        base_instructions,
        include_plan_tool,
+        include_apply_patch_tool,
    } = params;
    let overrides = ConfigOverrides {
        model,
@@ -378,6 +379,7 @@ fn derive_config_from_params(
        codex_linux_sandbox_exe,
        base_instructions,
        include_plan_tool,
+        include_apply_patch_tool,
        disable_response_storage: None,
        show_raw_agent_reasoning: None,
    };
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -160,6 +160,7 @@ impl CodexToolCallParam {
            codex_linux_sandbox_exe,
            base_instructions,
            include_plan_tool,
+            include_apply_patch_tool: None,
            disable_response_storage: None,
            show_raw_agent_reasoning: None,
        };
--- a/codex-rs/mcp-server/src/tool_handlers/create_conversation.rs
+++ b/codex-rs/mcp-server/src/tool_handlers/create_conversation.rs
@@ -52,6 +52,7 @@ pub(crate) async fn handle_create_conversation(
        codex_linux_sandbox_exe: None,
        base_instructions,
        include_plan_tool: None,
+        include_apply_patch_tool: None,
        disable_response_storage: None,
        show_raw_agent_reasoning: None,
    };
--- a/codex-rs/mcp-server/src/wire_format.rs
+++ b/codex-rs/mcp-server/src/wire_format.rs
@@ -90,6 +90,10 @@ pub struct NewConversationParams {
    /// Whether to include the plan tool in the conversation.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub include_plan_tool: Option<bool>,
+
+    /// Whether to include the apply patch tool in the conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include_apply_patch_tool: Option<bool>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
@@ -241,6 +245,7 @@ mod tests {
                config: None,
                base_instructions: None,
                include_plan_tool: None,
+                include_apply_patch_tool: None,
            },
        };
        assert_eq!(
--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -116,6 +116,7 @@ pub async fn run_main(
        codex_linux_sandbox_exe,
        base_instructions: None,
        include_plan_tool: Some(true),
+        include_apply_patch_tool: None,
        disable_response_storage: cli.oss.then_some(true),
        show_raw_agent_reasoning: cli.oss.then_some(true),
    };