Stream model responses (#1810)

Stream models thoughts and responses instead of waiting for the whole thing to come through. Very rough right now, but I'm making the risk call to push through.
2025-08-04 21:23:22 -07:00
parent 063083af15
commit 906d449760
17 changed files with 1616 additions and 234 deletions
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -42,8 +42,10 @@ use crate::exec_command::strip_bash_lc_and_escape;
 use crate::history_cell::CommandOutput;
 use crate::history_cell::HistoryCell;
 use crate::history_cell::PatchEventType;
+use crate::live_wrap::RowBuilder;
 use crate::user_approval_widget::ApprovalRequest;
 use codex_file_search::FileMatch;
+use ratatui::style::Stylize;

 struct RunningCommand {
    command: Vec<String>,
@@ -64,6 +66,10 @@ pub(crate) struct ChatWidget<'a> {
    // at once into scrollback so the history contains a single message.
    answer_buffer: String,
    running_commands: HashMap<String, RunningCommand>,
+    live_builder: RowBuilder,
+    current_stream: Option<StreamKind>,
+    stream_header_emitted: bool,
+    live_max_rows: u16,
 }

 struct UserMessage {
@@ -71,6 +77,12 @@ struct UserMessage {
    image_paths: Vec<PathBuf>,
 }

+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum StreamKind {
+    Answer,
+    Reasoning,
+}
+
 impl From<String> for UserMessage {
    fn from(text: String) -> Self {
        Self {
@@ -151,6 +163,10 @@ impl ChatWidget<'_> {
            reasoning_buffer: String::new(),
            answer_buffer: String::new(),
            running_commands: HashMap::new(),
+            live_builder: RowBuilder::new(80),
+            current_stream: None,
+            stream_header_emitted: false,
+            live_max_rows: 3,
        }
    }

@@ -234,58 +250,45 @@ impl ChatWidget<'_> {

                self.request_redraw();
            }
-            EventMsg::AgentMessage(AgentMessageEvent { message }) => {
-                // Final assistant answer. Prefer the fully provided message
-                // from the event; if it is empty fall back to any accumulated
-                // delta buffer (some providers may only stream deltas and send
-                // an empty final message).
-                let full = if message.is_empty() {
-                    std::mem::take(&mut self.answer_buffer)
-                } else {
-                    self.answer_buffer.clear();
-                    message
-                };
-                if !full.is_empty() {
-                    self.add_to_history(HistoryCell::new_agent_message(&self.config, full));
-                }
+            EventMsg::AgentMessage(AgentMessageEvent { message: _ }) => {
+                // Final assistant answer: commit all remaining rows and close with
+                // a blank line. Use the final text if provided, otherwise rely on
+                // streamed deltas already in the builder.
+                self.finalize_stream(StreamKind::Answer);
                self.request_redraw();
            }
            EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
-                // Buffer only – do not emit partial lines. This avoids cases
-                // where long responses appear truncated if the terminal
-                // wrapped early. The full message is emitted on
-                // AgentMessage.
+                self.begin_stream(StreamKind::Answer);
                self.answer_buffer.push_str(&delta);
+                self.stream_push_and_maybe_commit(&delta);
+                self.request_redraw();
            }
            EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => {
-                // Buffer only – disable incremental reasoning streaming so we
-                // avoid truncated intermediate lines. Full text emitted on
-                // AgentReasoning.
+                // Stream CoT into the live pane; keep input visible and commit
+                // overflow rows incrementally to scrollback.
+                self.begin_stream(StreamKind::Reasoning);
                self.reasoning_buffer.push_str(&delta);
+                self.stream_push_and_maybe_commit(&delta);
+                self.request_redraw();
            }
-            EventMsg::AgentReasoning(AgentReasoningEvent { text }) => {
-                // Emit full reasoning text once. Some providers might send
-                // final event with empty text if only deltas were used.
-                let full = if text.is_empty() {
-                    std::mem::take(&mut self.reasoning_buffer)
-                } else {
-                    self.reasoning_buffer.clear();
-                    text
-                };
-                if !full.is_empty() {
-                    self.add_to_history(HistoryCell::new_agent_reasoning(&self.config, full));
-                }
+            EventMsg::AgentReasoning(AgentReasoningEvent { text: _ }) => {
+                // Final reasoning: commit remaining rows and close with a blank.
+                self.finalize_stream(StreamKind::Reasoning);
                self.request_redraw();
            }
            EventMsg::TaskStarted => {
                self.bottom_pane.clear_ctrl_c_quit_hint();
                self.bottom_pane.set_task_running(true);
+                // Replace composer with single-line spinner while waiting.
+                self.bottom_pane
+                    .update_status_text("waiting for model".to_string());
                self.request_redraw();
            }
            EventMsg::TaskComplete(TaskCompleteEvent {
                last_agent_message: _,
            }) => {
                self.bottom_pane.set_task_running(false);
+                self.bottom_pane.clear_live_ring();
                self.request_redraw();
            }
            EventMsg::TokenCount(token_usage) => {
@@ -298,8 +301,8 @@ impl ChatWidget<'_> {
                self.bottom_pane.set_task_running(false);
            }
            EventMsg::PlanUpdate(update) => {
+                // Commit plan updates directly to history (no status-line preview).
                self.add_to_history(HistoryCell::new_plan_update(update));
-                self.request_redraw();
            }
            EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
                call_id: _,
@@ -307,8 +310,7 @@ impl ChatWidget<'_> {
                cwd,
                reason,
            }) => {
-                // Print the command to the history so it is visible in the
-                // transcript *before* the modal asks for approval.
+                // Log a background summary immediately so the history is chronological.
                let cmdline = strip_bash_lc_and_escape(&command);
                let text = format!(
                    "command requires approval:\n$ {cmdline}{reason}",
@@ -344,7 +346,6 @@ impl ChatWidget<'_> {
                // approval dialog) and avoids surprising the user with a modal
                // prompt before they have seen *what* is being requested.
                // ------------------------------------------------------------------
-
                self.add_to_history(HistoryCell::new_patch_event(
                    PatchEventType::ApprovalRequest,
                    changes,
@@ -379,8 +380,6 @@ impl ChatWidget<'_> {
                auto_approved,
                changes,
            }) => {
-                // Even when a patch is auto‑approved we still display the
-                // summary so the user can follow along.
                self.add_to_history(HistoryCell::new_patch_event(
                    PatchEventType::ApplyBegin { auto_approved },
                    changes,
@@ -393,6 +392,7 @@ impl ChatWidget<'_> {
                stdout,
                stderr,
            }) => {
+                // Compute summary before moving stdout into the history cell.
                let cmd = self.running_commands.remove(&call_id);
                self.add_to_history(HistoryCell::new_completed_exec_command(
                    cmd.map(|cmd| cmd.command).unwrap_or_else(|| vec![call_id]),
@@ -442,14 +442,15 @@ impl ChatWidget<'_> {
                self.app_event_tx.send(AppEvent::ExitRequest);
            }
            event => {
-                self.add_to_history(HistoryCell::new_background_event(format!("{event:?}")));
+                let text = format!("{event:?}");
+                self.add_to_history(HistoryCell::new_background_event(text.clone()));
+                self.update_latest_log(text);
            }
        }
    }

    /// Update the live log preview while a task is running.
    pub(crate) fn update_latest_log(&mut self, line: String) {
-        // Forward only if we are currently showing the status indicator.
        self.bottom_pane.update_status_text(line);
    }

@@ -515,6 +516,97 @@ impl ChatWidget<'_> {
    }
 }

+impl ChatWidget<'_> {
+    fn begin_stream(&mut self, kind: StreamKind) {
+        if self.current_stream != Some(kind) {
+            self.current_stream = Some(kind);
+            self.stream_header_emitted = false;
+            // Clear any previous live content; we're starting a new stream.
+            self.live_builder = RowBuilder::new(self.live_builder.width());
+            // Ensure the waiting status is visible (composer replaced).
+            self.bottom_pane
+                .update_status_text("waiting for model".to_string());
+        }
+    }
+
+    fn stream_push_and_maybe_commit(&mut self, delta: &str) {
+        self.live_builder.push_fragment(delta);
+
+        // Commit overflow rows (small batches) while keeping the last N rows visible.
+        let drained = self
+            .live_builder
+            .drain_commit_ready(self.live_max_rows as usize);
+        if !drained.is_empty() {
+            let mut lines: Vec<ratatui::text::Line<'static>> = Vec::new();
+            if !self.stream_header_emitted {
+                match self.current_stream {
+                    Some(StreamKind::Reasoning) => {
+                        lines.push(ratatui::text::Line::from("thinking".magenta().italic()));
+                    }
+                    Some(StreamKind::Answer) => {
+                        lines.push(ratatui::text::Line::from("codex".magenta().bold()));
+                    }
+                    None => {}
+                }
+                self.stream_header_emitted = true;
+            }
+            for r in drained {
+                lines.push(ratatui::text::Line::from(r.text));
+            }
+            self.app_event_tx.send(AppEvent::InsertHistory(lines));
+        }
+
+        // Update the live ring overlay lines (text-only, newest at bottom).
+        let rows = self
+            .live_builder
+            .display_rows()
+            .into_iter()
+            .map(|r| ratatui::text::Line::from(r.text))
+            .collect::<Vec<_>>();
+        self.bottom_pane
+            .set_live_ring_rows(self.live_max_rows, rows);
+    }
+
+    fn finalize_stream(&mut self, kind: StreamKind) {
+        if self.current_stream != Some(kind) {
+            // Nothing to do; either already finalized or not the active stream.
+            return;
+        }
+        // Flush any partial line as a full row, then drain all remaining rows.
+        self.live_builder.end_line();
+        let remaining = self.live_builder.drain_rows();
+        // TODO: Re-add markdown rendering for assistant answers and reasoning.
+        // When finalizing, pass the accumulated text through `markdown::append_markdown`
+        // to build styled `Line<'static>` entries instead of raw plain text lines.
+        if !remaining.is_empty() || !self.stream_header_emitted {
+            let mut lines: Vec<ratatui::text::Line<'static>> = Vec::new();
+            if !self.stream_header_emitted {
+                match kind {
+                    StreamKind::Reasoning => {
+                        lines.push(ratatui::text::Line::from("thinking".magenta().italic()));
+                    }
+                    StreamKind::Answer => {
+                        lines.push(ratatui::text::Line::from("codex".magenta().bold()));
+                    }
+                }
+                self.stream_header_emitted = true;
+            }
+            for r in remaining {
+                lines.push(ratatui::text::Line::from(r.text));
+            }
+            // Close the block with a blank line for readability.
+            lines.push(ratatui::text::Line::from(""));
+            self.app_event_tx.send(AppEvent::InsertHistory(lines));
+        }
+
+        // Clear the live overlay and reset state for the next stream.
+        self.live_builder = RowBuilder::new(self.live_builder.width());
+        self.bottom_pane.clear_live_ring();
+        self.current_stream = None;
+        self.stream_header_emitted = false;
+    }
+}
+
 impl WidgetRef for &ChatWidget<'_> {
    fn render_ref(&self, area: Rect, buf: &mut Buffer) {
        // In the hybrid inline viewport mode we only draw the interactive