feat: Freeform apply_patch with simple shell output (#4718)

## Summary This PR is an alternative approach to #4711, but instead of changing our storage, parses out shell calls in the client and reserializes them on the fly before we send them out as part of the request. What this changes: 1. Adds additional serialization logic when the ApplyPatchToolType::Freeform is in use. 2. Adds a --custom-apply-patch flag to enable this setting on a session-by-session basis. This change is delicate, but is not meant to be permanent. It is meant to be the first step in a migration: 1. (This PR) Add in-flight serialization with config 2. Update model_family default 3. Update serialization logic to store turn outputs in a structured format, with logic to serialize based on model_family setting. 4. Remove this rewrite in-flight logic. ## Test Plan - [x] Additional unit tests added - [x] Integration tests added - [x] Tested locally
2025-10-04 19:16:36 -07:00
parent 90ef94d3b3
commit 4764fc1ee7
11 changed files with 420 additions and 15 deletions
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -9,9 +9,11 @@ use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use codex_protocol::config_types::Verbosity as VerbosityConfig;
 use codex_protocol::models::ResponseItem;
 use futures::Stream;
+use serde::Deserialize;
 use serde::Serialize;
 use serde_json::Value;
 use std::borrow::Cow;
+use std::collections::HashSet;
 use std::ops::Deref;
 use std::pin::Pin;
 use std::task::Context;
@@ -64,10 +66,114 @@ impl Prompt {
    }

    pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
-        self.input.clone()
+        let mut input = self.input.clone();
+
+        // when using the *Freeform* apply_patch tool specifically, tool outputs
+        // should be structured text, not json. Do NOT reserialize when using
+        // the Function tool - note that this differs from the check above for
+        // instructions. We declare the result as a named variable for clarity.
+        let is_freeform_apply_patch_tool_present = self.tools.iter().any(|tool| match tool {
+            ToolSpec::Freeform(f) => f.name == "apply_patch",
+            _ => false,
+        });
+        if is_freeform_apply_patch_tool_present {
+            reserialize_shell_outputs(&mut input);
+        }
+
+        input
    }
 }

+fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
+    let mut shell_call_ids: HashSet<String> = HashSet::new();
+
+    items.iter_mut().for_each(|item| match item {
+        ResponseItem::LocalShellCall { call_id, id, .. } => {
+            if let Some(identifier) = call_id.clone().or_else(|| id.clone()) {
+                shell_call_ids.insert(identifier);
+            }
+        }
+        ResponseItem::CustomToolCall {
+            id: _,
+            status: _,
+            call_id,
+            name,
+            input: _,
+        } => {
+            if name == "apply_patch" {
+                shell_call_ids.insert(call_id.clone());
+            }
+        }
+        ResponseItem::CustomToolCallOutput { call_id, output } => {
+            if shell_call_ids.remove(call_id)
+                && let Some(structured) = parse_structured_shell_output(output)
+            {
+                *output = structured
+            }
+        }
+        ResponseItem::FunctionCall { name, call_id, .. }
+            if is_shell_tool_name(name) || name == "apply_patch" =>
+        {
+            shell_call_ids.insert(call_id.clone());
+        }
+        ResponseItem::FunctionCallOutput { call_id, output } => {
+            if shell_call_ids.remove(call_id)
+                && let Some(structured) = parse_structured_shell_output(&output.content)
+            {
+                output.content = structured
+            }
+        }
+        _ => {}
+    })
+}
+
+fn is_shell_tool_name(name: &str) -> bool {
+    matches!(name, "shell" | "container.exec")
+}
+
+#[derive(Deserialize)]
+struct ExecOutputJson {
+    output: String,
+    metadata: ExecOutputMetadataJson,
+}
+
+#[derive(Deserialize)]
+struct ExecOutputMetadataJson {
+    exit_code: i32,
+    duration_seconds: f32,
+}
+
+fn parse_structured_shell_output(raw: &str) -> Option<String> {
+    let parsed: ExecOutputJson = serde_json::from_str(raw).ok()?;
+    Some(build_structured_output(&parsed))
+}
+
+fn build_structured_output(parsed: &ExecOutputJson) -> String {
+    let mut sections = Vec::new();
+    sections.push(format!("Exit code: {}", parsed.metadata.exit_code));
+    sections.push(format!(
+        "Wall time: {} seconds",
+        parsed.metadata.duration_seconds
+    ));
+
+    if let Some(total_lines) = extract_total_output_lines(&parsed.output) {
+        sections.push(format!("Total output lines: {total_lines}"));
+    }
+
+    sections.push("Output:".to_string());
+    sections.push(parsed.output.clone());
+
+    sections.join("\n")
+}
+
+fn extract_total_output_lines(output: &str) -> Option<u32> {
+    let marker_start = output.find("[... omitted ")?;
+    let marker = &output[marker_start..];
+    let (_, after_of) = marker.split_once(" of ")?;
+    let (total_segment, _) = after_of.split_once(' ')?;
+    total_segment.parse::<u32>().ok()
+}
+
 #[derive(Debug)]
 pub enum ResponseEvent {
    Created,
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -750,6 +750,7 @@ pub struct ConfigToml {
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
+    pub experimental_use_freeform_apply_patch: Option<bool>,

    pub projects: Option<HashMap<String, ProjectConfig>>,

@@ -1111,7 +1112,9 @@ impl Config {
                .or(cfg.chatgpt_base_url)
                .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
            include_plan_tool: include_plan_tool.unwrap_or(false),
-            include_apply_patch_tool: include_apply_patch_tool.unwrap_or(false),
+            include_apply_patch_tool: include_apply_patch_tool
+                .or(cfg.experimental_use_freeform_apply_patch)
+                .unwrap_or(false),
            tools_web_search_request,
            use_experimental_streamable_shell_tool: cfg
                .experimental_use_exec_command_tool
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -110,6 +110,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            experimental_supported_tools: vec!["read_file".to_string()],
+            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
        )
    } else if slug.starts_with("gpt-5") {
        model_family!(
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -106,7 +106,7 @@ pub enum ApplyPatchToolType {
 pub(crate) fn create_apply_patch_freeform_tool() -> ToolSpec {
    ToolSpec::Freeform(FreeformTool {
        name: "apply_patch".to_string(),
-        description: "Use the `apply_patch` tool to edit files".to_string(),
+        description: "Use the `apply_patch` tool to edit files. This is a FREEFORM tool, so do not wrap the patch in JSON.".to_string(),
        format: FreeformToolFormat {
            r#type: "grammar".to_string(),
            syntax: "lark".to_string(),
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -695,7 +695,7 @@ mod tests {
        });
        let (tools, _) = build_specs(&config, Some(HashMap::new())).build();

-        assert_eq_tool_names(&tools, &["unified_exec", "read_file"]);
+        assert_eq_tool_names(&tools, &["unified_exec", "apply_patch", "read_file"]);
    }

    #[test]
@@ -921,6 +921,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
+                "apply_patch",
                "read_file",
                "web_search",
                "view_image",
@@ -929,7 +930,7 @@ mod tests {
        );

        assert_eq!(
-            tools[4],
+            tools[5],
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/search".to_string(),
                parameters: JsonSchema::Object {
@@ -988,6 +989,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
+                "apply_patch",
                "read_file",
                "web_search",
                "view_image",
@@ -995,7 +997,7 @@ mod tests {
            ],
        );
        assert_eq!(
-            tools[4],
+            tools[5],
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/paginate".to_string(),
                parameters: JsonSchema::Object {
@@ -1019,7 +1021,7 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            include_plan_tool: false,
-            include_apply_patch_tool: false,
+            include_apply_patch_tool: true,
            include_web_search_request: true,
            use_streamable_shell_tool: false,
            include_view_image_tool: true,
@@ -1052,6 +1054,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
+                "apply_patch",
                "read_file",
                "web_search",
                "view_image",
@@ -1059,7 +1062,7 @@ mod tests {
            ],
        );
        assert_eq!(
-            tools[4],
+            tools[5],
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/tags".to_string(),
                parameters: JsonSchema::Object {
@@ -1119,6 +1122,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
+                "apply_patch",
                "read_file",
                "web_search",
                "view_image",
@@ -1126,7 +1130,7 @@ mod tests {
            ],
        );
        assert_eq!(
-            tools[4],
+            tools[5],
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/value".to_string(),
                parameters: JsonSchema::Object {
@@ -1223,6 +1227,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
+                "apply_patch",
                "read_file",
                "web_search",
                "view_image",
@@ -1231,7 +1236,7 @@ mod tests {
        );

        assert_eq!(
-            tools[4],
+            tools[5],
            ToolSpec::Function(ResponsesApiTool {
                name: "test_server/do_something_cool".to_string(),
                parameters: JsonSchema::Object {