Move changing turn input functionalities to ConversationHistory (#5473)

We are doing some ad-hoc logic while dealing with conversation history. Ideally, we shouldn't mutate `vec[responseitem]` manually at all and should depend on `ConversationHistory` for those changes. Those changes are: - Adding input to the history - Removing items from the history - Correcting history I am also adding some `error` logs for cases we shouldn't ideally face. For example, we shouldn't be missing `toolcalls` or `outputs`. We shouldn't hit `ContextWindowExceeded` while performing `compact` This refactor will give us granular control over our context management.
2025-10-22 13:08:46 -07:00
parent 4cd6b01494
commit 273819aaae
6 changed files with 870 additions and 147 deletions
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -97,6 +97,10 @@ impl Match for ResponseMock {
            .lock()
            .unwrap()
            .push(ResponsesRequest(request.clone()));
+
+        // Enforce invariant checks on every request body captured by the mock.
+        // Panic on orphan tool outputs or calls to catch regressions early.
+        validate_request_body_invariants(request);
        true
    }
 }
@@ -386,3 +390,90 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res

    response_mock
 }
+
+/// Validate invariants on the request body sent to `/v1/responses`.
+///
+/// - No `function_call_output`/`custom_tool_call_output` with missing/empty `call_id`.
+/// - Every `function_call_output` must match a prior `function_call` or
+///   `local_shell_call` with the same `call_id` in the same `input`.
+/// - Every `custom_tool_call_output` must match a prior `custom_tool_call`.
+/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
+///   in the `input` must have a matching output entry.
+fn validate_request_body_invariants(request: &wiremock::Request) {
+    let Ok(body): Result<Value, _> = request.body_json() else {
+        return;
+    };
+    let Some(items) = body.get("input").and_then(Value::as_array) else {
+        panic!("input array not found in request");
+    };
+
+    use std::collections::HashSet;
+
+    fn get_call_id(item: &Value) -> Option<&str> {
+        item.get("call_id")
+            .and_then(Value::as_str)
+            .filter(|id| !id.is_empty())
+    }
+
+    fn gather_ids(items: &[Value], kind: &str) -> HashSet<String> {
+        items
+            .iter()
+            .filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
+            .filter_map(get_call_id)
+            .map(str::to_string)
+            .collect()
+    }
+
+    fn gather_output_ids(items: &[Value], kind: &str, missing_msg: &str) -> HashSet<String> {
+        items
+            .iter()
+            .filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
+            .map(|item| {
+                let Some(id) = get_call_id(item) else {
+                    panic!("{missing_msg}");
+                };
+                id.to_string()
+            })
+            .collect()
+    }
+
+    let function_calls = gather_ids(items, "function_call");
+    let custom_tool_calls = gather_ids(items, "custom_tool_call");
+    let local_shell_calls = gather_ids(items, "local_shell_call");
+    let function_call_outputs = gather_output_ids(
+        items,
+        "function_call_output",
+        "orphan function_call_output with empty call_id should be dropped",
+    );
+    let custom_tool_call_outputs = gather_output_ids(
+        items,
+        "custom_tool_call_output",
+        "orphan custom_tool_call_output with empty call_id should be dropped",
+    );
+
+    for cid in &function_call_outputs {
+        assert!(
+            function_calls.contains(cid) || local_shell_calls.contains(cid),
+            "function_call_output without matching call in input: {cid}",
+        );
+    }
+    for cid in &custom_tool_call_outputs {
+        assert!(
+            custom_tool_calls.contains(cid),
+            "custom_tool_call_output without matching call in input: {cid}",
+        );
+    }
+
+    for cid in &function_calls {
+        assert!(
+            function_call_outputs.contains(cid),
+            "Function call output is missing for call id: {cid}",
+        );
+    }
+    for cid in &custom_tool_calls {
+        assert!(
+            custom_tool_call_outputs.contains(cid),
+            "Custom tool call output is missing for call id: {cid}",
+        );
+    }
+}
--- a/codex-rs/core/tests/suite/tools.rs
+++ b/codex-rs/core/tests/suite/tools.rs
@@ -227,62 +227,6 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
    Ok(())
 }

-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn local_shell_missing_ids_maps_to_function_output_error() -> Result<()> {
-    skip_if_no_network!(Ok(()));
-
-    let server = start_mock_server().await;
-    let mut builder = test_codex();
-    let test = builder.build(&server).await?;
-
-    let local_shell_event = json!({
-        "type": "response.output_item.done",
-        "item": {
-            "type": "local_shell_call",
-            "status": "completed",
-            "action": {
-                "type": "exec",
-                "command": ["/bin/echo", "hi"],
-            }
-        }
-    });
-
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_response_created("resp-1"),
-            local_shell_event,
-            ev_completed("resp-1"),
-        ]),
-    )
-    .await;
-    let second_mock = mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    )
-    .await;
-
-    submit_turn(
-        &test,
-        "check shell output",
-        AskForApproval::Never,
-        SandboxPolicy::DangerFullAccess,
-    )
-    .await?;
-
-    let item = second_mock.single_request().function_call_output("");
-    assert_eq!(item.get("call_id").and_then(Value::as_str), Some(""));
-    assert_eq!(
-        item.get("output").and_then(Value::as_str),
-        Some("LocalShellCall without call_id or id"),
-    );
-
-    Ok(())
-}
-
 async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
    let server = start_mock_server().await;