Add warning on compact (#6052)
This PR introduces the ability for `core` to send `warnings` as it can send `errors. It also sends a warning on compaction. <img width="811" height="187" alt="image" src="https://github.com/user-attachments/assets/0947a42d-b720-420d-b7fd-115f8a65a46a" />
This commit is contained in:
@@ -13,6 +13,7 @@ use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
use crate::protocol::TurnContextItem;
|
||||
use crate::protocol::WarningEvent;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::util::backoff;
|
||||
use askama::Template;
|
||||
@@ -168,6 +169,11 @@ async fn run_compact_task_inner(
|
||||
message: "Compact task completed".to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
|
||||
let warning = EventMsg::Warning(WarningEvent {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, warning).await;
|
||||
}
|
||||
|
||||
pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
|
||||
|
||||
@@ -46,6 +46,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
|
||||
| EventMsg::UndoCompleted(_)
|
||||
| EventMsg::TurnAborted(_) => true,
|
||||
EventMsg::Error(_)
|
||||
| EventMsg::Warning(_)
|
||||
| EventMsg::TaskStarted(_)
|
||||
| EventMsg::TaskComplete(_)
|
||||
| EventMsg::AgentMessageDelta(_)
|
||||
|
||||
@@ -8,6 +8,7 @@ use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -45,6 +46,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
|
||||
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn summarize_context_three_requests_and_instructions() {
|
||||
@@ -118,6 +120,11 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
|
||||
// 2) Summarize – second hit should include the summarization prompt.
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
|
||||
panic!("expected warning event after compact");
|
||||
};
|
||||
assert_eq!(message, COMPACT_WARNING_MESSAGE);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// 3) Next user input – third hit; history should include only the summary.
|
||||
@@ -288,6 +295,11 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
.conversation;
|
||||
|
||||
codex.submit(Op::Compact).await.expect("trigger compact");
|
||||
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
|
||||
panic!("expected warning event after compact");
|
||||
};
|
||||
assert_eq!(message, COMPACT_WARNING_MESSAGE);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("collect requests");
|
||||
@@ -742,7 +754,6 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
|
||||
let EventMsg::BackgroundEvent(event) =
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
|
||||
else {
|
||||
@@ -753,6 +764,11 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
"background event should mention trimmed item count: {}",
|
||||
event.message
|
||||
);
|
||||
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
|
||||
panic!("expected warning event after compact retry");
|
||||
};
|
||||
assert_eq!(message, COMPACT_WARNING_MESSAGE);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//! request payload that Codex would send to the model and assert that the
|
||||
//! model-visible history matches the expected sequence of messages.
|
||||
|
||||
use super::compact::COMPACT_WARNING_MESSAGE;
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use codex_core::CodexAuth;
|
||||
@@ -20,6 +21,7 @@ use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
@@ -813,6 +815,11 @@ async fn compact_conversation(conversation: &Arc<CodexConversation>) {
|
||||
.submit(Op::Compact)
|
||||
.await
|
||||
.expect("compact conversation");
|
||||
let warning_event = wait_for_event(conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
|
||||
panic!("expected warning event after compact");
|
||||
};
|
||||
assert_eq!(message, COMPACT_WARNING_MESSAGE);
|
||||
wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ For complete documentation of the `Op` and `EventMsg` variants, refer to [protoc
|
||||
- `EventMsg::ExecApprovalRequest` – Request approval from user to execute a command
|
||||
- `EventMsg::TaskComplete` – A task completed successfully
|
||||
- `EventMsg::Error` – A task stopped with an error
|
||||
- `EventMsg::Warning` – A non-fatal warning that the client should surface to the user
|
||||
- `EventMsg::TurnComplete` – Contains a `response_id` bookmark for last `response_id` executed by the task. This can be used to continue the task at a later point in time, perhaps with additional user input.
|
||||
|
||||
The `response_id` returned from each task matches the OpenAI `response_id` stored in the API's `/responses` endpoint. It can be stored and used in future `Sessions` to resume threads of work.
|
||||
|
||||
@@ -21,6 +21,7 @@ use codex_core::protocol::StreamErrorEvent;
|
||||
use codex_core::protocol::TaskCompleteEvent;
|
||||
use codex_core::protocol::TurnAbortReason;
|
||||
use codex_core::protocol::TurnDiffEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_protocol::num_format::format_with_separators;
|
||||
use owo_colors::OwoColorize;
|
||||
@@ -54,6 +55,7 @@ pub(crate) struct EventProcessorWithHumanOutput {
|
||||
red: Style,
|
||||
green: Style,
|
||||
cyan: Style,
|
||||
yellow: Style,
|
||||
|
||||
/// Whether to include `AgentReasoning` events in the output.
|
||||
show_agent_reasoning: bool,
|
||||
@@ -81,6 +83,7 @@ impl EventProcessorWithHumanOutput {
|
||||
red: Style::new().red(),
|
||||
green: Style::new().green(),
|
||||
cyan: Style::new().cyan(),
|
||||
yellow: Style::new().yellow(),
|
||||
show_agent_reasoning: !config.hide_agent_reasoning,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
last_message_path,
|
||||
@@ -97,6 +100,7 @@ impl EventProcessorWithHumanOutput {
|
||||
red: Style::new(),
|
||||
green: Style::new(),
|
||||
cyan: Style::new(),
|
||||
yellow: Style::new(),
|
||||
show_agent_reasoning: !config.hide_agent_reasoning,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
last_message_path,
|
||||
@@ -161,6 +165,13 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
let prefix = "ERROR:".style(self.red);
|
||||
ts_msg!(self, "{prefix} {message}");
|
||||
}
|
||||
EventMsg::Warning(WarningEvent { message }) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {message}",
|
||||
"warning:".style(self.yellow).style(self.bold)
|
||||
);
|
||||
}
|
||||
EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary, details }) => {
|
||||
ts_msg!(
|
||||
self,
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::event_processor::handle_last_message;
|
||||
use crate::exec_events::AgentMessageItem;
|
||||
use crate::exec_events::CommandExecutionItem;
|
||||
use crate::exec_events::CommandExecutionStatus;
|
||||
use crate::exec_events::ErrorItem;
|
||||
use crate::exec_events::FileChangeItem;
|
||||
use crate::exec_events::FileUpdateChange;
|
||||
use crate::exec_events::ItemCompletedEvent;
|
||||
@@ -129,6 +130,15 @@ impl EventProcessorWithJsonOutput {
|
||||
self.last_critical_error = Some(error.clone());
|
||||
vec![ThreadEvent::Error(error)]
|
||||
}
|
||||
EventMsg::Warning(ev) => {
|
||||
let item = ThreadItem {
|
||||
id: self.get_next_item_id(),
|
||||
details: ThreadItemDetails::Error(ErrorItem {
|
||||
message: ev.message.clone(),
|
||||
}),
|
||||
};
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
|
||||
}
|
||||
EventMsg::StreamError(ev) => vec![ThreadEvent::Error(ThreadErrorEvent {
|
||||
message: ev.message.clone(),
|
||||
})],
|
||||
|
||||
@@ -12,11 +12,13 @@ use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
|
||||
use codex_exec::exec_events::AgentMessageItem;
|
||||
use codex_exec::exec_events::CommandExecutionItem;
|
||||
use codex_exec::exec_events::CommandExecutionStatus;
|
||||
use codex_exec::exec_events::ErrorItem;
|
||||
use codex_exec::exec_events::ItemCompletedEvent;
|
||||
use codex_exec::exec_events::ItemStartedEvent;
|
||||
use codex_exec::exec_events::ItemUpdatedEvent;
|
||||
@@ -540,6 +542,28 @@ fn error_event_produces_error() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn warning_event_produces_error_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let out = ep.collect_thread_events(&event(
|
||||
"e1",
|
||||
EventMsg::Warning(WarningEvent {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
}),
|
||||
));
|
||||
assert_eq!(
|
||||
out,
|
||||
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
|
||||
item: ThreadItem {
|
||||
id: "item_0".to_string(),
|
||||
details: ThreadItemDetails::Error(ErrorItem {
|
||||
message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
|
||||
}),
|
||||
},
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_error_event_produces_error() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
|
||||
@@ -204,6 +204,9 @@ async fn run_codex_tool_session_inner(
|
||||
outgoing.send_response(request_id.clone(), result).await;
|
||||
break;
|
||||
}
|
||||
EventMsg::Warning(_) => {
|
||||
continue;
|
||||
}
|
||||
EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
|
||||
call_id,
|
||||
reason,
|
||||
|
||||
@@ -438,6 +438,10 @@ pub enum EventMsg {
|
||||
/// Error while executing a submission
|
||||
Error(ErrorEvent),
|
||||
|
||||
/// Warning issued while processing a submission. Unlike `Error`, this
|
||||
/// indicates the task continued but the user should still be notified.
|
||||
Warning(WarningEvent),
|
||||
|
||||
/// Agent has started a task
|
||||
TaskStarted(TaskStartedEvent),
|
||||
|
||||
@@ -672,6 +676,11 @@ pub struct ErrorEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct WarningEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct TaskCompleteEvent {
|
||||
pub last_agent_message: Option<String>,
|
||||
|
||||
@@ -42,6 +42,7 @@ use codex_core::protocol::UndoCompletedEvent;
|
||||
use codex_core::protocol::UndoStartedEvent;
|
||||
use codex_core::protocol::UserMessageEvent;
|
||||
use codex_core::protocol::ViewImageToolCallEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::protocol::WebSearchBeginEvent;
|
||||
use codex_core::protocol::WebSearchEndEvent;
|
||||
use codex_protocol::ConversationId;
|
||||
@@ -519,6 +520,11 @@ impl ChatWidget {
|
||||
self.maybe_send_next_queued_input();
|
||||
}
|
||||
|
||||
fn on_warning(&mut self, message: String) {
|
||||
self.add_to_history(history_cell::new_warning_event(message));
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
/// Handle a turn aborted due to user interrupt (Esc).
|
||||
/// When there are queued user messages, restore them into the composer
|
||||
/// separated by newlines rather than auto‑submitting the next one.
|
||||
@@ -1477,6 +1483,7 @@ impl ChatWidget {
|
||||
self.set_token_info(ev.info);
|
||||
self.on_rate_limit_snapshot(ev.rate_limits);
|
||||
}
|
||||
EventMsg::Warning(WarningEvent { message }) => self.on_warning(message),
|
||||
EventMsg::Error(ErrorEvent { message }) => self.on_error(message),
|
||||
EventMsg::TurnAborted(ev) => match ev.reason {
|
||||
TurnAbortReason::Interrupted => {
|
||||
|
||||
@@ -37,6 +37,7 @@ use codex_core::protocol::TaskStartedEvent;
|
||||
use codex_core::protocol::UndoCompletedEvent;
|
||||
use codex_core::protocol::UndoStartedEvent;
|
||||
use codex_core::protocol::ViewImageToolCallEvent;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::plan_tool::PlanItemArg;
|
||||
@@ -56,6 +57,8 @@ use tempfile::tempdir;
|
||||
use tokio::sync::mpsc::error::TryRecvError;
|
||||
use tokio::sync::mpsc::unbounded_channel;
|
||||
|
||||
const TEST_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";
|
||||
|
||||
fn test_config() -> Config {
|
||||
// Use base defaults to avoid depending on host state.
|
||||
Config::load_from_base_config_with_overrides(
|
||||
@@ -2445,6 +2448,25 @@ fn stream_error_updates_status_indicator() {
|
||||
assert_eq!(status.header(), msg);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn warning_event_adds_warning_history_cell() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();
|
||||
chat.handle_codex_event(Event {
|
||||
id: "sub-1".into(),
|
||||
msg: EventMsg::Warning(WarningEvent {
|
||||
message: TEST_WARNING_MESSAGE.to_string(),
|
||||
}),
|
||||
});
|
||||
|
||||
let cells = drain_insert_history(&mut rx);
|
||||
assert_eq!(cells.len(), 1, "expected one warning history cell");
|
||||
let rendered = lines_to_single_string(&cells[0]);
|
||||
assert!(
|
||||
rendered.contains(TEST_WARNING_MESSAGE),
|
||||
"warning cell missing content: {rendered}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_agent_messages_in_single_turn_emit_multiple_headers() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();
|
||||
|
||||
Reference in New Issue
Block a user