Filter out reasoning items from previous turns (#5857)

Reduces request size and prevents 400 errors when switching between API
orgs.

Based on Responses API behavior described in
https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
This commit is contained in:
pakrym-oai
2025-10-28 11:39:34 -07:00
committed by GitHub
parent 65107d24a2
commit 1b8f2543ac
6 changed files with 176 additions and 29 deletions

View File

@@ -1001,12 +1001,6 @@ impl Session {
}
}
// todo (aibrahim): get rid of this method. we shouldn't deal with vec[resposne_item] and rather use ConversationHistory.
pub(crate) async fn history_snapshot(&self) -> Vec<ResponseItem> {
let mut state = self.state.lock().await;
state.history_snapshot()
}
pub(crate) async fn clone_history(&self) -> ConversationHistory {
let state = self.state.lock().await;
state.clone_history()
@@ -1746,11 +1740,11 @@ pub(crate) async fn run_task(
if !pending_input.is_empty() {
review_thread_history.record_items(&pending_input);
}
review_thread_history.get_history()
review_thread_history.get_history_for_prompt()
} else {
sess.record_conversation_items(&turn_context, &pending_input)
.await;
sess.history_snapshot().await
sess.clone_history().await.get_history_for_prompt()
};
let turn_input_messages: Vec<String> = turn_input
@@ -1907,13 +1901,6 @@ fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
}
}
fn filter_model_visible_history(input: Vec<ResponseItem>) -> Vec<ResponseItem> {
input
.into_iter()
.filter(|item| !matches!(item, ResponseItem::GhostSnapshot { .. }))
.collect()
}
async fn run_turn(
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
@@ -1934,7 +1921,7 @@ async fn run_turn(
.supports_parallel_tool_calls;
let parallel_tool_calls = model_supports_parallel;
let prompt = Prompt {
input: filter_model_visible_history(input),
input,
tools: router.specs(),
parallel_tool_calls,
base_instructions_override: turn_context.base_instructions.clone(),
@@ -2462,7 +2449,9 @@ mod tests {
},
)));
let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
let actual = tokio_test::block_on(async {
session.state.lock().await.clone_history().get_history()
});
assert_eq!(expected, actual);
}
@@ -2473,7 +2462,9 @@ mod tests {
tokio_test::block_on(session.record_initial_history(InitialHistory::Forked(rollout_items)));
let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
let actual = tokio_test::block_on(async {
session.state.lock().await.clone_history().get_history()
});
assert_eq!(expected, actual);
}
@@ -2870,7 +2861,7 @@ mod tests {
}
}
let history = sess.history_snapshot().await;
let history = sess.clone_history().await.get_history();
let found = history.iter().any(|item| match item {
ResponseItem::Message { role, content, .. } if role == "user" => {
content.iter().any(|ci| match ci {

View File

@@ -2,7 +2,6 @@ use std::sync::Arc;
use super::Session;
use super::TurnContext;
use super::filter_model_visible_history;
use super::get_last_assistant_message_from_turn;
use crate::Prompt;
use crate::client_common::ResponseEvent;
@@ -86,10 +85,9 @@ async fn run_compact_task_inner(
sess.persist_rollout_items(&[rollout_item]).await;
loop {
let turn_input = history.get_history();
let prompt_input = filter_model_visible_history(turn_input.clone());
let turn_input = history.get_history_for_prompt();
let prompt = Prompt {
input: prompt_input.clone(),
input: turn_input.clone(),
..Default::default()
};
let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;
@@ -111,7 +109,7 @@ async fn run_compact_task_inner(
return;
}
Err(e @ CodexErr::ContextWindowExceeded) => {
if prompt_input.len() > 1 {
if turn_input.len() > 1 {
// Trim from the beginning to preserve cache (prefix-based) and keep recent messages intact.
error!(
"Context window exceeded while compacting; removing oldest history item. Error: {e}"
@@ -150,7 +148,7 @@ async fn run_compact_task_inner(
}
}
let history_snapshot = sess.history_snapshot().await;
let history_snapshot = sess.clone_history().await.get_history();
let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
let user_messages = collect_user_messages(&history_snapshot);
let initial_context = sess.build_initial_context(turn_context.as_ref());

View File

@@ -67,6 +67,15 @@ impl ConversationHistory {
self.contents()
}
// Returns the history prepared for sending to the model.
// With extra response items filtered out and GhostCommits removed.
pub(crate) fn get_history_for_prompt(&mut self) -> Vec<ResponseItem> {
let mut history = self.get_history();
Self::remove_ghost_snapshots(&mut history);
Self::remove_reasoning_before_last_turn(&mut history);
history
}
pub(crate) fn remove_first_item(&mut self) {
if !self.items.is_empty() {
// Remove the oldest item (front of the list). Items are ordered from
@@ -111,6 +120,29 @@ impl ConversationHistory {
self.items.clone()
}
fn remove_ghost_snapshots(items: &mut Vec<ResponseItem>) {
items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. }));
}
fn remove_reasoning_before_last_turn(items: &mut Vec<ResponseItem>) {
// Responses API drops reasoning items before the last user message.
// Sending them is harmless but can lead to validation errors when switching between API organizations.
// https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
let Some(last_user_index) = items
.iter()
// Use last user message as the turn boundary.
.rposition(|item| matches!(item, ResponseItem::Message { role, .. } if role == "user"))
else {
return;
};
let mut index = 0usize;
items.retain(|item| {
let keep = index >= last_user_index || !matches!(item, ResponseItem::Reasoning { .. });
index += 1;
keep
});
}
fn ensure_call_outputs_present(&mut self) {
// Collect synthetic outputs to insert immediately after their calls.
// Store the insertion position (index of call) alongside the item so
@@ -498,6 +530,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use codex_git_tooling::GhostCommit;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::LocalShellAction;
@@ -515,6 +548,15 @@ mod tests {
}
}
fn reasoning(id: &str) -> ResponseItem {
ResponseItem::Reasoning {
id: id.to_string(),
summary: Vec::new(),
content: None,
encrypted_content: None,
}
}
fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
let mut h = ConversationHistory::new();
h.record_items(items.iter());
@@ -571,6 +613,50 @@ mod tests {
);
}
#[test]
fn get_history_drops_reasoning_before_last_user_message() {
let mut history = ConversationHistory::new();
let items = vec![
user_msg("initial"),
reasoning("first"),
assistant_msg("ack"),
user_msg("latest"),
reasoning("second"),
assistant_msg("ack"),
reasoning("third"),
];
history.record_items(items.iter());
let filtered = history.get_history_for_prompt();
assert_eq!(
filtered,
vec![
user_msg("initial"),
assistant_msg("ack"),
user_msg("latest"),
reasoning("second"),
assistant_msg("ack"),
reasoning("third"),
]
);
let reasoning_count = history
.contents()
.iter()
.filter(|item| matches!(item, ResponseItem::Reasoning { .. }))
.count();
assert_eq!(reasoning_count, 3);
}
#[test]
fn get_history_for_prompt_drops_ghost_commits() {
let items = vec![ResponseItem::GhostSnapshot {
ghost_commit: GhostCommit::new("ghost-1".to_string(), None, Vec::new(), Vec::new()),
}];
let mut history = create_history_with_items(items);
let filtered = history.get_history_for_prompt();
assert_eq!(filtered, vec![]);
}
#[test]
fn remove_first_item_removes_matching_output_for_function_call() {
let items = vec![

View File

@@ -34,10 +34,6 @@ impl SessionState {
self.history.record_items(items)
}
pub(crate) fn history_snapshot(&mut self) -> Vec<ResponseItem> {
self.history.get_history()
}
pub(crate) fn clone_history(&self) -> ConversationHistory {
self.history.clone()
}