[apply-patch] Clean up apply-patch tool definitions (#2539)

## Summary We've experienced a bit of drift in system prompting for `apply_patch`: - As pointed out in #2030 , our prettier formatting started altering prompt.md in a few ways - We introduced a separate markdown file for apply_patch instructions in #993, but currently duplicate them in the prompt.md file - We added a first-class apply_patch tool in #2303, which has yet another definition This PR starts to consolidate our logic in a few ways: - We now only use `apply_patch_tool_instructions.md](https://github.com/openai/codex/compare/dh--apply-patch-tool-definition?expand=1#diff-d4fffee5f85cb1975d3f66143a379e6c329de40c83ed5bf03ffd3829df985bea) for system instructions - We no longer include apply_patch system instructions if the tool is specified I'm leaving the definition in openai_tools.rs as duplicated text for now because we're going to be iterated on the first-class tool soon. ## Testing - [x] Added integration tests to verify prompt stability - [x] Tested locally with several different models (gpt-5, gpt-oss, o4-mini)
2025-08-21 20:07:41 -07:00
parent 9f71dcbf57
commit e4c275d615
6 changed files with 282 additions and 107 deletions
--- a/codex-rs/core/tests/prompt_caching.rs
+++ b/codex-rs/core/tests/prompt_caching.rs
@@ -25,6 +25,179 @@ fn sse_completed(id: &str) -> String {
    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn default_system_instructions_contain_apply_patch() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+
+    let conversation_manager = ConversationManager::default();
+    let codex = conversation_manager
+        .new_conversation_with_auth(config, Some(CodexAuth::from_api_key("Test API Key")))
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let expected_instructions = [
+        include_str!("../prompt.md"),
+        include_str!("../../apply-patch/apply_patch_tool_instructions.md"),
+    ]
+    .join("\n");
+
+    let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body0["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+    let body1 = requests[1].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body1["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn prompt_tools_are_consistent_across_requests() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+    config.include_apply_patch_tool = true;
+    config.include_plan_tool = true;
+
+    let conversation_manager = ConversationManager::default();
+    let codex = conversation_manager
+        .new_conversation_with_auth(config, Some(CodexAuth::from_api_key("Test API Key")))
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let expected_instructions: &str = include_str!("../prompt.md");
+    // our internal implementation is responsible for keeping tools in sync
+    // with the OpenAI schema, so we just verify the tool presence here
+    let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"];
+    fn assert_tool_names(body: &serde_json::Value, expected_names: &[&str]) {
+        assert_eq!(
+            body["tools"]
+                .as_array()
+                .unwrap()
+                .iter()
+                .map(|t| t["name"].as_str().unwrap().to_string())
+                .collect::<Vec<_>>(),
+            expected_names
+        );
+    }
+
+    let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body0["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+    assert_tool_names(&body0, expected_tools_names);
+
+    let body1 = requests[1].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body1["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+    assert_tool_names(&body1, expected_tools_names);
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn prefixes_context_and_instructions_once_and_consistently_across_requests() {
    use pretty_assertions::assert_eq;