[tools] Add apply_patch tool (#2303)

## Summary
We've been seeing a number of issues and reports with our synthetic
`apply_patch` tool, e.g. #802. Let's make this a real tool - in my
anecdotal testing, it's critical for GPT-OSS models, but I'd like to
make it the standard across GPT-5 and codex models as well.

## Testing
- [x] Tested locally
- [x] Integration test
This commit is contained in:
Dylan
2025-08-15 11:55:53 -04:00
committed by GitHub
parent 917e29803b
commit 6df8e35314
14 changed files with 330 additions and 2 deletions

2
codex-rs/Cargo.lock generated
View File

@@ -751,6 +751,7 @@ dependencies = [
"codex-common",
"codex-core",
"codex-ollama",
"core_test_support",
"libc",
"owo-colors",
"predicates",
@@ -760,6 +761,7 @@ dependencies = [
"tokio",
"tracing",
"tracing-subscriber",
"wiremock",
]
[[package]]

View File

@@ -67,6 +67,7 @@ use crate::models::ReasoningItemReasoningSummary;
use crate::models::ResponseInputItem;
use crate::models::ResponseItem;
use crate::models::ShellToolCallParams;
use crate::openai_tools::ApplyPatchToolArgs;
use crate::openai_tools::ToolsConfig;
use crate::openai_tools::get_openai_tools;
use crate::parse_command::parse_command;
@@ -455,6 +456,7 @@ impl Session {
approval_policy,
sandbox_policy.clone(),
config.include_plan_tool,
config.include_apply_patch_tool,
),
tx_event: tx_event.clone(),
user_instructions,
@@ -1727,6 +1729,30 @@ async fn handle_function_call(
handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id)
.await
}
"apply_patch" => {
let args = match serde_json::from_str::<ApplyPatchToolArgs>(&arguments) {
Ok(a) => a,
Err(e) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: None,
},
};
}
};
let exec_params = ExecParams {
command: vec!["apply_patch".to_string(), args.input.clone()],
cwd: sess.cwd.clone(),
timeout_ms: None,
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
handle_container_exec_with_params(exec_params, sess, turn_diff_tracker, sub_id, call_id)
.await
}
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
_ => {
match sess.mcp_connection_manager.parse_tool_name(&name) {

View File

@@ -156,6 +156,11 @@ pub struct Config {
/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
pub include_plan_tool: bool,
/// Include the `apply_patch` tool for models that benefit from invoking
/// file edits as a structured tool call. When unset, this falls back to the
/// model family's default preference.
pub include_apply_patch_tool: bool,
/// The value for the `originator` header included with Responses API requests.
pub internal_originator: Option<String>,
}
@@ -480,6 +485,7 @@ pub struct ConfigOverrides {
pub codex_linux_sandbox_exe: Option<PathBuf>,
pub base_instructions: Option<String>,
pub include_plan_tool: Option<bool>,
pub include_apply_patch_tool: Option<bool>,
pub disable_response_storage: Option<bool>,
pub show_raw_agent_reasoning: Option<bool>,
}
@@ -505,6 +511,7 @@ impl Config {
codex_linux_sandbox_exe,
base_instructions,
include_plan_tool,
include_apply_patch_tool,
disable_response_storage,
show_raw_agent_reasoning,
} = overrides;
@@ -581,6 +588,7 @@ impl Config {
needs_special_apply_patch_instructions: false,
supports_reasoning_summaries,
uses_local_shell_tool: false,
uses_apply_patch_tool: false,
}
});
@@ -607,6 +615,9 @@ impl Config {
Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
let base_instructions = base_instructions.or(file_base_instructions);
let include_apply_patch_tool_val =
include_apply_patch_tool.unwrap_or(model_family.uses_apply_patch_tool);
let config = Self {
model,
model_family,
@@ -659,6 +670,7 @@ impl Config {
experimental_resume,
include_plan_tool: include_plan_tool.unwrap_or(false),
include_apply_patch_tool: include_apply_patch_tool_val,
internal_originator: cfg.internal_originator,
};
Ok(config)
@@ -1022,6 +1034,7 @@ disable_response_storage = true
experimental_resume: None,
base_instructions: None,
include_plan_tool: false,
include_apply_patch_tool: false,
internal_originator: None,
},
o3_profile_config
@@ -1073,6 +1086,7 @@ disable_response_storage = true
experimental_resume: None,
base_instructions: None,
include_plan_tool: false,
include_apply_patch_tool: false,
internal_originator: None,
};
@@ -1139,6 +1153,7 @@ disable_response_storage = true
experimental_resume: None,
base_instructions: None,
include_plan_tool: false,
include_apply_patch_tool: false,
internal_originator: None,
};

View File

@@ -23,6 +23,10 @@ pub struct ModelFamily {
// the model such that its description can be omitted.
// See https://platform.openai.com/docs/guides/tools-local-shell
pub uses_local_shell_tool: bool,
/// True if the model performs better when `apply_patch` is provided as
/// a tool call instead of just a bash command.
pub uses_apply_patch_tool: bool,
}
macro_rules! model_family {
@@ -36,6 +40,7 @@ macro_rules! model_family {
needs_special_apply_patch_instructions: false,
supports_reasoning_summaries: false,
uses_local_shell_tool: false,
uses_apply_patch_tool: false,
};
// apply overrides
$(
@@ -55,6 +60,7 @@ macro_rules! simple_model_family {
needs_special_apply_patch_instructions: false,
supports_reasoning_summaries: false,
uses_local_shell_tool: false,
uses_apply_patch_tool: false,
})
}};
}
@@ -88,10 +94,10 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
slug, "gpt-4.1",
needs_special_apply_patch_instructions: true,
)
} else if slug.starts_with("gpt-oss") {
model_family!(slug, "gpt-oss", uses_apply_patch_tool: true)
} else if slug.starts_with("gpt-4o") {
simple_model_family!(slug, "gpt-4o")
} else if slug.starts_with("gpt-oss") {
simple_model_family!(slug, "gpt-oss")
} else if slug.starts_with("gpt-3.5") {
simple_model_family!(slug, "gpt-3.5")
} else if slug.starts_with("gpt-5") {

View File

@@ -43,6 +43,7 @@ pub enum ConfigShellToolType {
pub struct ToolsConfig {
pub shell_type: ConfigShellToolType,
pub plan_tool: bool,
pub apply_patch_tool: bool,
}
impl ToolsConfig {
@@ -51,6 +52,7 @@ impl ToolsConfig {
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
include_plan_tool: bool,
include_apply_patch_tool: bool,
) -> Self {
let mut shell_type = if model_family.uses_local_shell_tool {
ConfigShellToolType::LocalShell
@@ -66,6 +68,7 @@ impl ToolsConfig {
Self {
shell_type,
plan_tool: include_plan_tool,
apply_patch_tool: include_apply_patch_tool || model_family.uses_apply_patch_tool,
}
}
}
@@ -235,6 +238,87 @@ The shell tool is used to execute shell commands.
})
}
#[derive(Serialize, Deserialize)]
pub(crate) struct ApplyPatchToolArgs {
pub(crate) input: String,
}
fn create_apply_patch_tool() -> OpenAiTool {
// Minimal schema: one required string argument containing the patch body
let mut properties = BTreeMap::new();
properties.insert(
"input".to_string(),
JsonSchema::String {
description: Some(r#"The entire contents of the apply_patch command"#.to_string()),
},
);
OpenAiTool::Function(ResponsesApiTool {
name: "apply_patch".to_string(),
description: r#"Use this tool to edit files.
Your patch language is a strippeddown, fileoriented diff format designed to be easy to parse and safe to apply. You can think of it as a highlevel envelope:
**_ Begin Patch
[ one or more file sections ]
_** End Patch
Within that envelope, you get a sequence of file operations.
You MUST include a header to specify the action you are taking.
Each operation starts with one of three headers:
**_ Add File: <path> - create a new file. Every following line is a + line (the initial contents).
_** Delete File: <path> - remove an existing file. Nothing follows.
\*\*\* Update File: <path> - patch an existing file in place (optionally with a rename).
May be immediately followed by \*\*\* Move to: <new path> if you want to rename the file.
Then one or more “hunks”, each introduced by @@ (optionally followed by a hunk header).
Within a hunk each line starts with:
- for inserted text,
* for removed text, or
space ( ) for context.
At the end of a truncated hunk you can emit \*\*\* End of File.
Patch := Begin { FileOp } End
Begin := "**_ Begin Patch" NEWLINE
End := "_** End Patch" NEWLINE
FileOp := AddFile | DeleteFile | UpdateFile
AddFile := "**_ Add File: " path NEWLINE { "+" line NEWLINE }
DeleteFile := "_** Delete File: " path NEWLINE
UpdateFile := "**_ Update File: " path NEWLINE [ MoveTo ] { Hunk }
MoveTo := "_** Move to: " newPath NEWLINE
Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ]
HunkLine := (" " | "-" | "+") text NEWLINE
A full patch can combine several operations:
**_ Begin Patch
_** Add File: hello.txt
+Hello world
**_ Update File: src/app.py
_** Move to: src/main.py
@@ def greet():
-print("Hi")
+print("Hello, world!")
**_ Delete File: obsolete.txt
_** End Patch
It is important to remember:
- You must include a header with your intended action (Add/Delete/Update)
- You must prefix new lines with `+` even when creating a new file
"#
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["input".to_string()]),
additional_properties: Some(false),
},
})
}
/// Returns JSON values that are compatible with Function Calling in the
/// Responses API:
/// https://platform.openai.com/docs/guides/function-calling?api-mode=responses
@@ -455,6 +539,10 @@ pub(crate) fn get_openai_tools(
tools.push(PLAN_TOOL.clone());
}
if config.apply_patch_tool {
tools.push(create_apply_patch_tool());
}
if let Some(mcp_tools) = mcp_tools {
for (name, tool) in mcp_tools {
match mcp_tool_to_openai_tool(name.clone(), tool.clone()) {
@@ -508,6 +596,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
true,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(&config, Some(HashMap::new()));
@@ -522,6 +611,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
true,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(&config, Some(HashMap::new()));
@@ -536,6 +626,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
false,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(
&config,
@@ -629,6 +720,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
false,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(
@@ -684,6 +776,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
false,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(
@@ -734,6 +827,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
false,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(
@@ -787,6 +881,7 @@ mod tests {
AskForApproval::Never,
SandboxPolicy::ReadOnly,
false,
model_family.uses_apply_patch_tool,
);
let tools = get_openai_tools(

View File

@@ -47,6 +47,26 @@ pub fn load_sse_fixture(path: impl AsRef<std::path::Path>) -> String {
.collect()
}
pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String {
let replaced = raw.replace("__ID__", id);
let events: Vec<serde_json::Value> =
serde_json::from_str(&replaced).expect("parse JSON fixture");
events
.into_iter()
.map(|e| {
let kind = e
.get("type")
.and_then(|v| v.as_str())
.expect("fixture event missing type");
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
format!("event: {kind}\n\n")
} else {
format!("event: {kind}\ndata: {e}\n\n")
}
})
.collect()
}
/// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the
/// fixture template with the supplied identifier before parsing. This lets a
/// single JSON template be reused by multiple tests that each need a unique

View File

@@ -44,3 +44,5 @@ assert_cmd = "2"
libc = "0.2"
predicates = "3"
tempfile = "3.13.0"
wiremock = "0.6"
core_test_support = { path = "../core/tests/common" }

View File

@@ -146,6 +146,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
codex_linux_sandbox_exe,
base_instructions: None,
include_plan_tool: None,
include_apply_patch_tool: None,
disable_response_storage: oss.then_some(true),
show_raw_agent_reasoning: oss.then_some(true),
};

View File

@@ -1,3 +1,5 @@
#![allow(clippy::expect_used, clippy::unwrap_used)]
use anyhow::Context;
use assert_cmd::prelude::*;
use codex_core::CODEX_APPLY_PATCH_ARG1;
@@ -37,3 +39,152 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
);
Ok(())
}
#[cfg(not(target_os = "windows"))]
#[tokio::test]
async fn test_apply_patch_tool() -> anyhow::Result<()> {
use core_test_support::load_sse_fixture_with_id_from_str;
use tempfile::TempDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
const SSE_TOOL_CALL_ADD: &str = r#"[
{
"type": "response.output_item.done",
"item": {
"type": "function_call",
"name": "apply_patch",
"arguments": "{\n \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
"call_id": "__ID__"
}
},
{
"type": "response.completed",
"response": {
"id": "__ID__",
"usage": {
"input_tokens": 0,
"input_tokens_details": null,
"output_tokens": 0,
"output_tokens_details": null,
"total_tokens": 0
},
"output": []
}
}
]"#;
const SSE_TOOL_CALL_UPDATE: &str = r#"[
{
"type": "response.output_item.done",
"item": {
"type": "function_call",
"name": "apply_patch",
"arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
"call_id": "__ID__"
}
},
{
"type": "response.completed",
"response": {
"id": "__ID__",
"usage": {
"input_tokens": 0,
"input_tokens_details": null,
"output_tokens": 0,
"output_tokens_details": null,
"total_tokens": 0
},
"output": []
}
}
]"#;
const SSE_TOOL_CALL_COMPLETED: &str = r#"[
{
"type": "response.completed",
"response": {
"id": "__ID__",
"usage": {
"input_tokens": 0,
"input_tokens_details": null,
"output_tokens": 0,
"output_tokens_details": null,
"total_tokens": 0
},
"output": []
}
}
]"#;
// Start a mock model server
let server = MockServer::start().await;
// First response: model calls apply_patch to create test.md
let first = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
"text/event-stream",
);
Mock::given(method("POST"))
// .and(path("/v1/responses"))
.respond_with(first)
.up_to_n_times(1)
.mount(&server)
.await;
// Second response: model calls apply_patch to update test.md
let second = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
"text/event-stream",
);
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(second)
.up_to_n_times(1)
.mount(&server)
.await;
let final_completed = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
"text/event-stream",
);
Mock::given(method("POST"))
// .and(path("/v1/responses"))
.respond_with(final_completed)
.expect(1)
.mount(&server)
.await;
let tmp_cwd = TempDir::new().unwrap();
Command::cargo_bin("codex-exec")
.context("should find binary for codex-exec")?
.current_dir(tmp_cwd.path())
.env("CODEX_HOME", tmp_cwd.path())
.env("OPENAI_API_KEY", "dummy")
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
.arg("--skip-git-repo-check")
.arg("-s")
.arg("workspace-write")
.arg("foo")
.assert()
.success();
// Verify final file contents
let final_path = tmp_cwd.path().join("test.md");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(contents, "Final text\n");
Ok(())
}

View File

@@ -367,6 +367,7 @@ fn derive_config_from_params(
config: cli_overrides,
base_instructions,
include_plan_tool,
include_apply_patch_tool,
} = params;
let overrides = ConfigOverrides {
model,
@@ -378,6 +379,7 @@ fn derive_config_from_params(
codex_linux_sandbox_exe,
base_instructions,
include_plan_tool,
include_apply_patch_tool,
disable_response_storage: None,
show_raw_agent_reasoning: None,
};

View File

@@ -160,6 +160,7 @@ impl CodexToolCallParam {
codex_linux_sandbox_exe,
base_instructions,
include_plan_tool,
include_apply_patch_tool: None,
disable_response_storage: None,
show_raw_agent_reasoning: None,
};

View File

@@ -52,6 +52,7 @@ pub(crate) async fn handle_create_conversation(
codex_linux_sandbox_exe: None,
base_instructions,
include_plan_tool: None,
include_apply_patch_tool: None,
disable_response_storage: None,
show_raw_agent_reasoning: None,
};

View File

@@ -90,6 +90,10 @@ pub struct NewConversationParams {
/// Whether to include the plan tool in the conversation.
#[serde(skip_serializing_if = "Option::is_none")]
pub include_plan_tool: Option<bool>,
/// Whether to include the apply patch tool in the conversation.
#[serde(skip_serializing_if = "Option::is_none")]
pub include_apply_patch_tool: Option<bool>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
@@ -241,6 +245,7 @@ mod tests {
config: None,
base_instructions: None,
include_plan_tool: None,
include_apply_patch_tool: None,
},
};
assert_eq!(

View File

@@ -116,6 +116,7 @@ pub async fn run_main(
codex_linux_sandbox_exe,
base_instructions: None,
include_plan_tool: Some(true),
include_apply_patch_tool: None,
disable_response_storage: cli.oss.then_some(true),
show_raw_agent_reasoning: cli.oss.then_some(true),
};