Add warning on compact (#6052)

This PR introduces the ability for `core` to send `warnings` as it can send `errors. It also sends a warning on compaction. <img width="811" height="187" alt="image" src="https://github.com/user-attachments/assets/0947a42d-b720-420d-b7fd-115f8a65a46a" />
2025-10-31 13:27:33 -07:00
parent 88e083a9d0
commit c8ebb2a0dc
12 changed files with 118 additions and 1 deletions
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -13,6 +13,7 @@ use crate::protocol::ErrorEvent;
 use crate::protocol::EventMsg;
 use crate::protocol::TaskStartedEvent;
 use crate::protocol::TurnContextItem;
+use crate::protocol::WarningEvent;
 use crate::truncate::truncate_middle;
 use crate::util::backoff;
 use askama::Template;
@@ -168,6 +169,11 @@ async fn run_compact_task_inner(
        message: "Compact task completed".to_string(),
    });
    sess.send_event(&turn_context, event).await;
+
+    let warning = EventMsg::Warning(WarningEvent {
+        message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
+    });
+    sess.send_event(&turn_context, warning).await;
 }

 pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
--- a/codex-rs/core/src/rollout/policy.rs
+++ b/codex-rs/core/src/rollout/policy.rs
@@ -46,6 +46,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::UndoCompleted(_)
        | EventMsg::TurnAborted(_) => true,
        EventMsg::Error(_)
+        | EventMsg::Warning(_)
        | EventMsg::TaskStarted(_)
        | EventMsg::TaskComplete(_)
        | EventMsg::AgentMessageDelta(_)
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -8,6 +8,7 @@ use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
+use codex_core::protocol::WarningEvent;
 use codex_protocol::user_input::UserInput;
 use core_test_support::load_default_config_for_test;
 use core_test_support::skip_if_no_network;
@@ -45,6 +46,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
 const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
 const DUMMY_CALL_ID: &str = "call-multi-auto";
 const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
+pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
@@ -118,6 +120,11 @@ async fn summarize_context_three_requests_and_instructions() {

    // 2) Summarize – second hit should include the summarization prompt.
    codex.submit(Op::Compact).await.unwrap();
+    let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
+    let EventMsg::Warning(WarningEvent { message }) = warning_event else {
+        panic!("expected warning event after compact");
+    };
+    assert_eq!(message, COMPACT_WARNING_MESSAGE);
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // 3) Next user input – third hit; history should include only the summary.
@@ -288,6 +295,11 @@ async fn manual_compact_uses_custom_prompt() {
        .conversation;

    codex.submit(Op::Compact).await.expect("trigger compact");
+    let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
+    let EventMsg::Warning(WarningEvent { message }) = warning_event else {
+        panic!("expected warning event after compact");
+    };
+    assert_eq!(message, COMPACT_WARNING_MESSAGE);
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    let requests = server.received_requests().await.expect("collect requests");
@@ -742,7 +754,6 @@ async fn manual_compact_retries_after_context_window_error() {
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    codex.submit(Op::Compact).await.unwrap();
-
    let EventMsg::BackgroundEvent(event) =
        wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
    else {
@@ -753,6 +764,11 @@ async fn manual_compact_retries_after_context_window_error() {
        "background event should mention trimmed item count: {}",
        event.message
    );
+    let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
+    let EventMsg::Warning(WarningEvent { message }) = warning_event else {
+        panic!("expected warning event after compact retry");
+    };
+    assert_eq!(message, COMPACT_WARNING_MESSAGE);
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    let requests = request_log.requests();
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -7,6 +7,7 @@
 //! request payload that Codex would send to the model and assert that the
 //! model-visible history matches the expected sequence of messages.

+use super::compact::COMPACT_WARNING_MESSAGE;
 use super::compact::FIRST_REPLY;
 use super::compact::SUMMARY_TEXT;
 use codex_core::CodexAuth;
@@ -20,6 +21,7 @@ use codex_core::config::Config;
 use codex_core::config::OPENAI_DEFAULT_MODEL;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
+use codex_core::protocol::WarningEvent;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::user_input::UserInput;
 use core_test_support::load_default_config_for_test;
@@ -813,6 +815,11 @@ async fn compact_conversation(conversation: &Arc<CodexConversation>) {
        .submit(Op::Compact)
        .await
        .expect("compact conversation");
+    let warning_event = wait_for_event(conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
+    let EventMsg::Warning(WarningEvent { message }) = warning_event else {
+        panic!("expected warning event after compact");
+    };
+    assert_eq!(message, COMPACT_WARNING_MESSAGE);
    wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 }

--- a/codex-rs/docs/protocol_v1.md
+++ b/codex-rs/docs/protocol_v1.md
@@ -73,6 +73,7 @@ For complete documentation of the `Op` and `EventMsg` variants, refer to [protoc
  - `EventMsg::ExecApprovalRequest` – Request approval from user to execute a command
  - `EventMsg::TaskComplete` – A task completed successfully
  - `EventMsg::Error` – A task stopped with an error
+  - `EventMsg::Warning` – A non-fatal warning that the client should surface to the user
  - `EventMsg::TurnComplete` – Contains a `response_id` bookmark for last `response_id` executed by the task. This can be used to continue the task at a later point in time, perhaps with additional user input.

 The `response_id` returned from each task matches the OpenAI `response_id` stored in the API's `/responses` endpoint. It can be stored and used in future `Sessions` to resume threads of work.
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -21,6 +21,7 @@ use codex_core::protocol::StreamErrorEvent;
 use codex_core::protocol::TaskCompleteEvent;
 use codex_core::protocol::TurnAbortReason;
 use codex_core::protocol::TurnDiffEvent;
+use codex_core::protocol::WarningEvent;
 use codex_core::protocol::WebSearchEndEvent;
 use codex_protocol::num_format::format_with_separators;
 use owo_colors::OwoColorize;
@@ -54,6 +55,7 @@ pub(crate) struct EventProcessorWithHumanOutput {
    red: Style,
    green: Style,
    cyan: Style,
+    yellow: Style,

    /// Whether to include `AgentReasoning` events in the output.
    show_agent_reasoning: bool,
@@ -81,6 +83,7 @@ impl EventProcessorWithHumanOutput {
                red: Style::new().red(),
                green: Style::new().green(),
                cyan: Style::new().cyan(),
+                yellow: Style::new().yellow(),
                show_agent_reasoning: !config.hide_agent_reasoning,
                show_raw_agent_reasoning: config.show_raw_agent_reasoning,
                last_message_path,
@@ -97,6 +100,7 @@ impl EventProcessorWithHumanOutput {
                red: Style::new(),
                green: Style::new(),
                cyan: Style::new(),
+                yellow: Style::new(),
                show_agent_reasoning: !config.hide_agent_reasoning,
                show_raw_agent_reasoning: config.show_raw_agent_reasoning,
                last_message_path,
@@ -161,6 +165,13 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                let prefix = "ERROR:".style(self.red);
                ts_msg!(self, "{prefix} {message}");
            }
+            EventMsg::Warning(WarningEvent { message }) => {
+                ts_msg!(
+                    self,
+                    "{} {message}",
+                    "warning:".style(self.yellow).style(self.bold)
+                );
+            }
            EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary, details }) => {
                ts_msg!(
                    self,
--- a/codex-rs/exec/src/event_processor_with_jsonl_output.rs
+++ b/codex-rs/exec/src/event_processor_with_jsonl_output.rs
@@ -8,6 +8,7 @@ use crate::event_processor::handle_last_message;
 use crate::exec_events::AgentMessageItem;
 use crate::exec_events::CommandExecutionItem;
 use crate::exec_events::CommandExecutionStatus;
+use crate::exec_events::ErrorItem;
 use crate::exec_events::FileChangeItem;
 use crate::exec_events::FileUpdateChange;
 use crate::exec_events::ItemCompletedEvent;
@@ -129,6 +130,15 @@ impl EventProcessorWithJsonOutput {
                self.last_critical_error = Some(error.clone());
                vec![ThreadEvent::Error(error)]
            }
+            EventMsg::Warning(ev) => {
+                let item = ThreadItem {
+                    id: self.get_next_item_id(),
+                    details: ThreadItemDetails::Error(ErrorItem {
+                        message: ev.message.clone(),
+                    }),
+                };
+                vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
+            }
            EventMsg::StreamError(ev) => vec![ThreadEvent::Error(ThreadErrorEvent {
                message: ev.message.clone(),
            })],
--- a/codex-rs/exec/tests/event_processor_with_json_output.rs
+++ b/codex-rs/exec/tests/event_processor_with_json_output.rs
@@ -12,11 +12,13 @@ use codex_core::protocol::McpToolCallEndEvent;
 use codex_core::protocol::PatchApplyBeginEvent;
 use codex_core::protocol::PatchApplyEndEvent;
 use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::WarningEvent;
 use codex_core::protocol::WebSearchEndEvent;
 use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
 use codex_exec::exec_events::AgentMessageItem;
 use codex_exec::exec_events::CommandExecutionItem;
 use codex_exec::exec_events::CommandExecutionStatus;
+use codex_exec::exec_events::ErrorItem;
 use codex_exec::exec_events::ItemCompletedEvent;
 use codex_exec::exec_events::ItemStartedEvent;
 use codex_exec::exec_events::ItemUpdatedEvent;
@@ -540,6 +542,28 @@ fn error_event_produces_error() {
    );
 }

+#[test]
+fn warning_event_produces_error_item() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "e1",
+        EventMsg::Warning(WarningEvent {
+            message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
+            item: ThreadItem {
+                id: "item_0".to_string(),
+                details: ThreadItemDetails::Error(ErrorItem {
+                    message: "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
 #[test]
 fn stream_error_event_produces_error() {
    let mut ep = EventProcessorWithJsonOutput::new(None);
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -204,6 +204,9 @@ async fn run_codex_tool_session_inner(
                        outgoing.send_response(request_id.clone(), result).await;
                        break;
                    }
+                    EventMsg::Warning(_) => {
+                        continue;
+                    }
                    EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
                        call_id,
                        reason,
--- a/codex-rs/protocol/src/protocol.rs
+++ b/codex-rs/protocol/src/protocol.rs
@@ -438,6 +438,10 @@ pub enum EventMsg {
    /// Error while executing a submission
    Error(ErrorEvent),

+    /// Warning issued while processing a submission. Unlike `Error`, this
+    /// indicates the task continued but the user should still be notified.
+    Warning(WarningEvent),
+
    /// Agent has started a task
    TaskStarted(TaskStartedEvent),

@@ -672,6 +676,11 @@ pub struct ErrorEvent {
    pub message: String,
 }

+#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
+pub struct WarningEvent {
+    pub message: String,
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 pub struct TaskCompleteEvent {
    pub last_agent_message: Option<String>,
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -42,6 +42,7 @@ use codex_core::protocol::UndoCompletedEvent;
 use codex_core::protocol::UndoStartedEvent;
 use codex_core::protocol::UserMessageEvent;
 use codex_core::protocol::ViewImageToolCallEvent;
+use codex_core::protocol::WarningEvent;
 use codex_core::protocol::WebSearchBeginEvent;
 use codex_core::protocol::WebSearchEndEvent;
 use codex_protocol::ConversationId;
@@ -519,6 +520,11 @@ impl ChatWidget {
        self.maybe_send_next_queued_input();
    }

+    fn on_warning(&mut self, message: String) {
+        self.add_to_history(history_cell::new_warning_event(message));
+        self.request_redraw();
+    }
+
    /// Handle a turn aborted due to user interrupt (Esc).
    /// When there are queued user messages, restore them into the composer
    /// separated by newlines rather than auto‑submitting the next one.
@@ -1477,6 +1483,7 @@ impl ChatWidget {
                self.set_token_info(ev.info);
                self.on_rate_limit_snapshot(ev.rate_limits);
            }
+            EventMsg::Warning(WarningEvent { message }) => self.on_warning(message),
            EventMsg::Error(ErrorEvent { message }) => self.on_error(message),
            EventMsg::TurnAborted(ev) => match ev.reason {
                TurnAbortReason::Interrupted => {
--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -37,6 +37,7 @@ use codex_core::protocol::TaskStartedEvent;
 use codex_core::protocol::UndoCompletedEvent;
 use codex_core::protocol::UndoStartedEvent;
 use codex_core::protocol::ViewImageToolCallEvent;
+use codex_core::protocol::WarningEvent;
 use codex_protocol::ConversationId;
 use codex_protocol::parse_command::ParsedCommand;
 use codex_protocol::plan_tool::PlanItemArg;
@@ -56,6 +57,8 @@ use tempfile::tempdir;
 use tokio::sync::mpsc::error::TryRecvError;
 use tokio::sync::mpsc::unbounded_channel;

+const TEST_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";
+
 fn test_config() -> Config {
    // Use base defaults to avoid depending on host state.
    Config::load_from_base_config_with_overrides(
@@ -2445,6 +2448,25 @@ fn stream_error_updates_status_indicator() {
    assert_eq!(status.header(), msg);
 }

+#[test]
+fn warning_event_adds_warning_history_cell() {
+    let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();
+    chat.handle_codex_event(Event {
+        id: "sub-1".into(),
+        msg: EventMsg::Warning(WarningEvent {
+            message: TEST_WARNING_MESSAGE.to_string(),
+        }),
+    });
+
+    let cells = drain_insert_history(&mut rx);
+    assert_eq!(cells.len(), 1, "expected one warning history cell");
+    let rendered = lines_to_single_string(&cells[0]);
+    assert!(
+        rendered.contains(TEST_WARNING_MESSAGE),
+        "warning cell missing content: {rendered}"
+    );
+}
+
 #[test]
 fn multiple_agent_messages_in_single_turn_emit_multiple_headers() {
    let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();