fix: provide tolerance for apply_patch tool (#993)

As explained in detail in the doc comment for `ParseMode::Lenient`, we have observed that GPT-4.1 does not always generate a valid invocation of `apply_patch`. Fortunately, the error is predictable, so we introduce some new logic to the `codex-apply-patch` crate to recover from this error. Because we would like to avoid this becoming a de facto standard (as it would be incompatible if `apply_patch` were provided as an actual executable, unless we also introduced the lenient behavior in the executable, as well), we require passing `ParseMode::Lenient` to `parse_patch_text()` to make it clear that the caller is opting into supporting this special case. Note the analogous change to the TypeScript CLI was https://github.com/openai/codex/pull/930. In addition to changing the accepted input to `apply_patch`, it also introduced additional instructions for the model, which we include in this PR. Note that `apply-patch` does not depend on either `regex` or `regex-lite`, so some of the checks are slightly more verbose to avoid introducing this dependency. That said, this PR does not leverage the existing `extract_heredoc_body_from_apply_patch_command()`, which depends on `tree-sitter` and `tree-sitter-bash`: 5a5aa89914/codex-rs/apply-patch/src/lib.rs (L191-L246) though perhaps it should.
2025-06-03 09:06:38 -07:00
parent 5a5aa89914
commit 6fcc528a43
6 changed files with 281 additions and 35 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -38,7 +38,7 @@ pub(crate) async fn stream_chat_completions(
    // Build messages array
    let mut messages = Vec::<serde_json::Value>::new();

-    let full_instructions = prompt.get_full_instructions();
+    let full_instructions = prompt.get_full_instructions(model);
    messages.push(json!({"role": "system", "content": full_instructions}));

    for item in &prompt.input {
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -106,7 +106,7 @@ impl ModelClient {
            return stream_from_fixture(path).await;
        }

-        let full_instructions = prompt.get_full_instructions();
+        let full_instructions = prompt.get_full_instructions(&self.model);
        let tools_json = create_tools_json_for_responses_api(prompt, &self.model)?;
        let reasoning = create_reasoning_param_for_request(&self.model, self.effort, self.summary);
        let payload = ResponsesApiRequest {
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::error::Result;
 use crate::models::ResponseItem;
+use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use futures::Stream;
 use serde::Serialize;
 use std::borrow::Cow;
@@ -35,14 +36,22 @@ pub struct Prompt {
 }

 impl Prompt {
-    pub(crate) fn get_full_instructions(&self) -> Cow<str> {
-        match &self.instructions {
-            Some(instructions) => {
-                let instructions = format!("{BASE_INSTRUCTIONS}\n{instructions}");
-                Cow::Owned(instructions)
-            }
-            None => Cow::Borrowed(BASE_INSTRUCTIONS),
-        }
+    pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<str> {
+        [
+            Some(Cow::Borrowed(BASE_INSTRUCTIONS)),
+            self.instructions.as_ref().map(|s| Cow::Owned(s.clone())),
+            if model.starts_with("gpt-4.1") {
+                Some(Cow::Borrowed(APPLY_PATCH_TOOL_INSTRUCTIONS))
+            } else {
+                None
+            },
+        ]
+        .iter()
+        .filter_map(|s| s.as_ref())
+        .map(|cow| cow.as_ref())
+        .collect::<Vec<_>>()
+        .join("\n")
+        .into()
    }
 }