Move changing turn input functionalities to ConversationHistory (#5473)
We are doing some ad-hoc logic while dealing with conversation history. Ideally, we shouldn't mutate `vec[responseitem]` manually at all and should depend on `ConversationHistory` for those changes. Those changes are: - Adding input to the history - Removing items from the history - Correcting history I am also adding some `error` logs for cases we shouldn't ideally face. For example, we shouldn't be missing `toolcalls` or `outputs`. We shouldn't hit `ContextWindowExceeded` while performing `compact` This refactor will give us granular control over our context management.
This commit is contained in:
@@ -97,6 +97,10 @@ impl Match for ResponseMock {
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(ResponsesRequest(request.clone()));
|
||||
|
||||
// Enforce invariant checks on every request body captured by the mock.
|
||||
// Panic on orphan tool outputs or calls to catch regressions early.
|
||||
validate_request_body_invariants(request);
|
||||
true
|
||||
}
|
||||
}
|
||||
@@ -386,3 +390,90 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res
|
||||
|
||||
response_mock
|
||||
}
|
||||
|
||||
/// Validate invariants on the request body sent to `/v1/responses`.
|
||||
///
|
||||
/// - No `function_call_output`/`custom_tool_call_output` with missing/empty `call_id`.
|
||||
/// - Every `function_call_output` must match a prior `function_call` or
|
||||
/// `local_shell_call` with the same `call_id` in the same `input`.
|
||||
/// - Every `custom_tool_call_output` must match a prior `custom_tool_call`.
|
||||
/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
|
||||
/// in the `input` must have a matching output entry.
|
||||
fn validate_request_body_invariants(request: &wiremock::Request) {
|
||||
let Ok(body): Result<Value, _> = request.body_json() else {
|
||||
return;
|
||||
};
|
||||
let Some(items) = body.get("input").and_then(Value::as_array) else {
|
||||
panic!("input array not found in request");
|
||||
};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
fn get_call_id(item: &Value) -> Option<&str> {
|
||||
item.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.filter(|id| !id.is_empty())
|
||||
}
|
||||
|
||||
fn gather_ids(items: &[Value], kind: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.filter_map(get_call_id)
|
||||
.map(str::to_string)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn gather_output_ids(items: &[Value], kind: &str, missing_msg: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.map(|item| {
|
||||
let Some(id) = get_call_id(item) else {
|
||||
panic!("{missing_msg}");
|
||||
};
|
||||
id.to_string()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
let function_calls = gather_ids(items, "function_call");
|
||||
let custom_tool_calls = gather_ids(items, "custom_tool_call");
|
||||
let local_shell_calls = gather_ids(items, "local_shell_call");
|
||||
let function_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"function_call_output",
|
||||
"orphan function_call_output with empty call_id should be dropped",
|
||||
);
|
||||
let custom_tool_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"custom_tool_call_output",
|
||||
"orphan custom_tool_call_output with empty call_id should be dropped",
|
||||
);
|
||||
|
||||
for cid in &function_call_outputs {
|
||||
assert!(
|
||||
function_calls.contains(cid) || local_shell_calls.contains(cid),
|
||||
"function_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_call_outputs {
|
||||
assert!(
|
||||
custom_tool_calls.contains(cid),
|
||||
"custom_tool_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
|
||||
for cid in &function_calls {
|
||||
assert!(
|
||||
function_call_outputs.contains(cid),
|
||||
"Function call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_calls {
|
||||
assert!(
|
||||
custom_tool_call_outputs.contains(cid),
|
||||
"Custom tool call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,62 +227,6 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_missing_ids_maps_to_function_output_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let local_shell_event = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "local_shell_call",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "exec",
|
||||
"command": ["/bin/echo", "hi"],
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
local_shell_event,
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"check shell output",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let item = second_mock.single_request().function_call_output("");
|
||||
assert_eq!(item.get("call_id").and_then(Value::as_str), Some(""));
|
||||
assert_eq!(
|
||||
item.get("output").and_then(Value::as_str),
|
||||
Some("LocalShellCall without call_id or id"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user