Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API orgs. Based on Responses API behavior described in https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
2025-10-28 11:39:34 -07:00
parent 65107d24a2
commit 1b8f2543ac
6 changed files with 176 additions and 29 deletions
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -68,6 +68,14 @@ impl ResponsesRequest {
            .clone()
    }

+    pub fn inputs_of_type(&self, ty: &str) -> Vec<Value> {
+        self.input()
+            .iter()
+            .filter(|item| item.get("type").and_then(Value::as_str) == Some(ty))
+            .cloned()
+            .collect()
+    }
+
    pub fn function_call_output(&self, call_id: &str) -> Value {
        self.call_output(call_id, "function_call_output")
    }
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -18,7 +18,10 @@ use codex_core::shell::default_user_shell;
 use codex_protocol::user_input::UserInput;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::responses;
+use core_test_support::responses::mount_sse_once;
 use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use std::collections::HashMap;
 use tempfile::TempDir;
@@ -883,3 +886,68 @@ async fn send_user_turn_with_changes_sends_environment_context() {
    ]);
    assert_eq!(body2["input"], expected_input_2);
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn cached_prompt_filters_reasoning_items_from_previous_turns() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let call_id = "shell-call";
+    let shell_args = serde_json::json!({
+        "command": ["/bin/echo", "tool output"],
+        "timeout_ms": 1_000,
+    });
+
+    let initial_response = responses::sse(vec![
+        responses::ev_response_created("resp-first"),
+        responses::ev_reasoning_item("reason-1", &["Planning shell command"], &[]),
+        responses::ev_function_call(
+            call_id,
+            "shell",
+            &serde_json::to_string(&shell_args).expect("serialize shell args"),
+        ),
+        responses::ev_completed("resp-first"),
+    ]);
+    let follow_up_response = responses::sse(vec![
+        responses::ev_response_created("resp-follow-up"),
+        responses::ev_reasoning_item(
+            "reason-2",
+            &["Shell execution completed"],
+            &["stdout: tool output"],
+        ),
+        responses::ev_assistant_message("assistant-1", "First turn reply"),
+        responses::ev_completed("resp-follow-up"),
+    ]);
+    let second_turn_response = responses::sse(vec![
+        responses::ev_response_created("resp-second"),
+        responses::ev_assistant_message("assistant-2", "Second turn reply"),
+        responses::ev_completed("resp-second"),
+    ]);
+    mount_sse_once(&server, initial_response).await;
+    let second_request = mount_sse_once(&server, follow_up_response).await;
+    let third_request = mount_sse_once(&server, second_turn_response).await;
+
+    let mut builder = test_codex();
+    let test = builder.build(&server).await?;
+
+    test.submit_turn("hello 1").await?;
+    test.submit_turn("hello 2").await?;
+
+    let second_request_input = second_request.single_request();
+    let reasoning_items = second_request_input.inputs_of_type("reasoning");
+    assert_eq!(
+        reasoning_items.len(),
+        1,
+        "expected first turn follow-up to include reasoning item"
+    );
+
+    let third_request_input = third_request.single_request();
+    let cached_reasoning = third_request_input.inputs_of_type("reasoning");
+    assert_eq!(
+        cached_reasoning.len(),
+        0,
+        "expected cached prompt to filter out prior reasoning items"
+    );
+
+    Ok(())
+}