Add warning on compact (#6052)

This PR introduces the ability for `core` to send `warnings` as it can
send `errors. It also sends a warning on compaction.

<img width="811" height="187" alt="image"
src="https://github.com/user-attachments/assets/0947a42d-b720-420d-b7fd-115f8a65a46a"
/>
This commit is contained in:
Ahmed Ibrahim
2025-10-31 13:27:33 -07:00
committed by GitHub
parent 88e083a9d0
commit c8ebb2a0dc
12 changed files with 118 additions and 1 deletions

View File

@@ -13,6 +13,7 @@ use crate::protocol::ErrorEvent;
use crate::protocol::EventMsg;
use crate::protocol::TaskStartedEvent;
use crate::protocol::TurnContextItem;
use crate::protocol::WarningEvent;
use crate::truncate::truncate_middle;
use crate::util::backoff;
use askama::Template;
@@ -168,6 +169,11 @@ async fn run_compact_task_inner(
message: "Compact task completed".to_string(),
});
sess.send_event(&turn_context, event).await;
let warning = EventMsg::Warning(WarningEvent {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
});
sess.send_event(&turn_context, warning).await;
}
pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {

View File

@@ -46,6 +46,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::UndoCompleted(_)
| EventMsg::TurnAborted(_) => true,
EventMsg::Error(_)
| EventMsg::Warning(_)
| EventMsg::TaskStarted(_)
| EventMsg::TaskComplete(_)
| EventMsg::AgentMessageDelta(_)

View File

@@ -8,6 +8,7 @@ use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::RolloutItem;
use codex_core::protocol::RolloutLine;
use codex_core::protocol::WarningEvent;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::skip_if_no_network;
@@ -45,6 +46,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
const DUMMY_CALL_ID: &str = "call-multi-auto";
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn summarize_context_three_requests_and_instructions() {
@@ -118,6 +120,11 @@ async fn summarize_context_three_requests_and_instructions() {
// 2) Summarize second hit should include the summarization prompt.
codex.submit(Op::Compact).await.unwrap();
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
panic!("expected warning event after compact");
};
assert_eq!(message, COMPACT_WARNING_MESSAGE);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// 3) Next user input third hit; history should include only the summary.
@@ -288,6 +295,11 @@ async fn manual_compact_uses_custom_prompt() {
.conversation;
codex.submit(Op::Compact).await.expect("trigger compact");
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
panic!("expected warning event after compact");
};
assert_eq!(message, COMPACT_WARNING_MESSAGE);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("collect requests");
@@ -742,7 +754,6 @@ async fn manual_compact_retries_after_context_window_error() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await.unwrap();
let EventMsg::BackgroundEvent(event) =
wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
else {
@@ -753,6 +764,11 @@ async fn manual_compact_retries_after_context_window_error() {
"background event should mention trimmed item count: {}",
event.message
);
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
panic!("expected warning event after compact retry");
};
assert_eq!(message, COMPACT_WARNING_MESSAGE);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = request_log.requests();

View File

@@ -7,6 +7,7 @@
//! request payload that Codex would send to the model and assert that the
//! model-visible history matches the expected sequence of messages.
use super::compact::COMPACT_WARNING_MESSAGE;
use super::compact::FIRST_REPLY;
use super::compact::SUMMARY_TEXT;
use codex_core::CodexAuth;
@@ -20,6 +21,7 @@ use codex_core::config::Config;
use codex_core::config::OPENAI_DEFAULT_MODEL;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::WarningEvent;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
@@ -813,6 +815,11 @@ async fn compact_conversation(conversation: &Arc<CodexConversation>) {
.submit(Op::Compact)
.await
.expect("compact conversation");
let warning_event = wait_for_event(conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
panic!("expected warning event after compact");
};
assert_eq!(message, COMPACT_WARNING_MESSAGE);
wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
}

View File

@@ -73,6 +73,7 @@ For complete documentation of the `Op` and `EventMsg` variants, refer to [protoc
- `EventMsg::ExecApprovalRequest` Request approval from user to execute a command
- `EventMsg::TaskComplete` A task completed successfully
- `EventMsg::Error` A task stopped with an error
- `EventMsg::Warning` A non-fatal warning that the client should surface to the user
- `EventMsg::TurnComplete` Contains a `response_id` bookmark for last `response_id` executed by the task. This can be used to continue the task at a later point in time, perhaps with additional user input.
The `response_id` returned from each task matches the OpenAI `response_id` stored in the API's `/responses` endpoint. It can be stored and used in future `Sessions` to resume threads of work.

View File

@@ -21,6 +21,7 @@ use codex_core::protocol::StreamErrorEvent;
use codex_core::protocol::TaskCompleteEvent;
use codex_core::protocol::TurnAbortReason;
use codex_core::protocol::TurnDiffEvent;
use codex_core::protocol::WarningEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::num_format::format_with_separators;
use owo_colors::OwoColorize;
@@ -54,6 +55,7 @@ pub(crate) struct EventProcessorWithHumanOutput {
red: Style,
green: Style,
cyan: Style,
yellow: Style,
/// Whether to include `AgentReasoning` events in the output.
show_agent_reasoning: bool,
@@ -81,6 +83,7 @@ impl EventProcessorWithHumanOutput {
red: Style::new().red(),
green: Style::new().green(),
cyan: Style::new().cyan(),
yellow: Style::new().yellow(),
show_agent_reasoning: !config.hide_agent_reasoning,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
last_message_path,
@@ -97,6 +100,7 @@ impl EventProcessorWithHumanOutput {
red: Style::new(),
green: Style::new(),
cyan: Style::new(),
yellow: Style::new(),
show_agent_reasoning: !config.hide_agent_reasoning,
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
last_message_path,
@@ -161,6 +165,13 @@ impl EventProcessor for EventProcessorWithHumanOutput {
let prefix = "ERROR:".style(self.red);
ts_msg!(self, "{prefix} {message}");
}
EventMsg::Warning(WarningEvent { message }) => {
ts_msg!(
self,
"{} {message}",
"warning:".style(self.yellow).style(self.bold)
);
}
EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary, details }) => {
ts_msg!(
self,

View File

@@ -8,6 +8,7 @@ use crate::event_processor::handle_last_message;
use crate::exec_events::AgentMessageItem;
use crate::exec_events::CommandExecutionItem;
use crate::exec_events::CommandExecutionStatus;
use crate::exec_events::ErrorItem;
use crate::exec_events::FileChangeItem;
use crate::exec_events::FileUpdateChange;
use crate::exec_events::ItemCompletedEvent;
@@ -129,6 +130,15 @@ impl EventProcessorWithJsonOutput {
self.last_critical_error = Some(error.clone());
vec![ThreadEvent::Error(error)]
}
EventMsg::Warning(ev) => {
let item = ThreadItem {
id: self.get_next_item_id(),
details: ThreadItemDetails::Error(ErrorItem {
message: ev.message.clone(),
}),
};
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
EventMsg::StreamError(ev) => vec![ThreadEvent::Error(ThreadErrorEvent {
message: ev.message.clone(),
})],

View File

@@ -12,11 +12,13 @@ use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::PatchApplyBeginEvent;
use codex_core::protocol::PatchApplyEndEvent;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::WarningEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
use codex_exec::exec_events::AgentMessageItem;
use codex_exec::exec_events::CommandExecutionItem;
use codex_exec::exec_events::CommandExecutionStatus;
use codex_exec::exec_events::ErrorItem;
use codex_exec::exec_events::ItemCompletedEvent;
use codex_exec::exec_events::ItemStartedEvent;
use codex_exec::exec_events::ItemUpdatedEvent;
@@ -540,6 +542,28 @@ fn error_event_produces_error() {
);
}
#[test]
fn warning_event_produces_error_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let out = ep.collect_thread_events(&event(
"e1",
EventMsg::Warning(WarningEvent {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
}),
));
assert_eq!(
out,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::Error(ErrorItem {
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
}),
},
})]
);
}
#[test]
fn stream_error_event_produces_error() {
let mut ep = EventProcessorWithJsonOutput::new(None);

View File

@@ -204,6 +204,9 @@ async fn run_codex_tool_session_inner(
outgoing.send_response(request_id.clone(), result).await;
break;
}
EventMsg::Warning(_) => {
continue;
}
EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
call_id,
reason,

View File

@@ -438,6 +438,10 @@ pub enum EventMsg {
/// Error while executing a submission
Error(ErrorEvent),
/// Warning issued while processing a submission. Unlike `Error`, this
/// indicates the task continued but the user should still be notified.
Warning(WarningEvent),
/// Agent has started a task
TaskStarted(TaskStartedEvent),
@@ -672,6 +676,11 @@ pub struct ErrorEvent {
pub message: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct WarningEvent {
pub message: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct TaskCompleteEvent {
pub last_agent_message: Option<String>,

View File

@@ -42,6 +42,7 @@ use codex_core::protocol::UndoCompletedEvent;
use codex_core::protocol::UndoStartedEvent;
use codex_core::protocol::UserMessageEvent;
use codex_core::protocol::ViewImageToolCallEvent;
use codex_core::protocol::WarningEvent;
use codex_core::protocol::WebSearchBeginEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::ConversationId;
@@ -519,6 +520,11 @@ impl ChatWidget {
self.maybe_send_next_queued_input();
}
fn on_warning(&mut self, message: String) {
self.add_to_history(history_cell::new_warning_event(message));
self.request_redraw();
}
/// Handle a turn aborted due to user interrupt (Esc).
/// When there are queued user messages, restore them into the composer
/// separated by newlines rather than autosubmitting the next one.
@@ -1477,6 +1483,7 @@ impl ChatWidget {
self.set_token_info(ev.info);
self.on_rate_limit_snapshot(ev.rate_limits);
}
EventMsg::Warning(WarningEvent { message }) => self.on_warning(message),
EventMsg::Error(ErrorEvent { message }) => self.on_error(message),
EventMsg::TurnAborted(ev) => match ev.reason {
TurnAbortReason::Interrupted => {

View File

@@ -37,6 +37,7 @@ use codex_core::protocol::TaskStartedEvent;
use codex_core::protocol::UndoCompletedEvent;
use codex_core::protocol::UndoStartedEvent;
use codex_core::protocol::ViewImageToolCallEvent;
use codex_core::protocol::WarningEvent;
use codex_protocol::ConversationId;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::plan_tool::PlanItemArg;
@@ -56,6 +57,8 @@ use tempfile::tempdir;
use tokio::sync::mpsc::error::TryRecvError;
use tokio::sync::mpsc::unbounded_channel;
const TEST_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";
fn test_config() -> Config {
// Use base defaults to avoid depending on host state.
Config::load_from_base_config_with_overrides(
@@ -2445,6 +2448,25 @@ fn stream_error_updates_status_indicator() {
assert_eq!(status.header(), msg);
}
#[test]
fn warning_event_adds_warning_history_cell() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();
chat.handle_codex_event(Event {
id: "sub-1".into(),
msg: EventMsg::Warning(WarningEvent {
message: TEST_WARNING_MESSAGE.to_string(),
}),
});
let cells = drain_insert_history(&mut rx);
assert_eq!(cells.len(), 1, "expected one warning history cell");
let rendered = lines_to_single_string(&cells[0]);
assert!(
rendered.contains(TEST_WARNING_MESSAGE),
"warning cell missing content: {rendered}"
);
}
#[test]
fn multiple_agent_messages_in_single_turn_emit_multiple_headers() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();