Stream model responses (#1810)

Stream models thoughts and responses instead of waiting for the whole
thing to come through. Very rough right now, but I'm making the risk call to push through.
This commit is contained in:
easong-openai
2025-08-04 21:23:22 -07:00
committed by GitHub
parent 063083af15
commit 906d449760
17 changed files with 1616 additions and 234 deletions

View File

@@ -42,8 +42,10 @@ use crate::exec_command::strip_bash_lc_and_escape;
use crate::history_cell::CommandOutput;
use crate::history_cell::HistoryCell;
use crate::history_cell::PatchEventType;
use crate::live_wrap::RowBuilder;
use crate::user_approval_widget::ApprovalRequest;
use codex_file_search::FileMatch;
use ratatui::style::Stylize;
struct RunningCommand {
command: Vec<String>,
@@ -64,6 +66,10 @@ pub(crate) struct ChatWidget<'a> {
// at once into scrollback so the history contains a single message.
answer_buffer: String,
running_commands: HashMap<String, RunningCommand>,
live_builder: RowBuilder,
current_stream: Option<StreamKind>,
stream_header_emitted: bool,
live_max_rows: u16,
}
struct UserMessage {
@@ -71,6 +77,12 @@ struct UserMessage {
image_paths: Vec<PathBuf>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StreamKind {
Answer,
Reasoning,
}
impl From<String> for UserMessage {
fn from(text: String) -> Self {
Self {
@@ -151,6 +163,10 @@ impl ChatWidget<'_> {
reasoning_buffer: String::new(),
answer_buffer: String::new(),
running_commands: HashMap::new(),
live_builder: RowBuilder::new(80),
current_stream: None,
stream_header_emitted: false,
live_max_rows: 3,
}
}
@@ -234,58 +250,45 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
// Final assistant answer. Prefer the fully provided message
// from the event; if it is empty fall back to any accumulated
// delta buffer (some providers may only stream deltas and send
// an empty final message).
let full = if message.is_empty() {
std::mem::take(&mut self.answer_buffer)
} else {
self.answer_buffer.clear();
message
};
if !full.is_empty() {
self.add_to_history(HistoryCell::new_agent_message(&self.config, full));
}
EventMsg::AgentMessage(AgentMessageEvent { message: _ }) => {
// Final assistant answer: commit all remaining rows and close with
// a blank line. Use the final text if provided, otherwise rely on
// streamed deltas already in the builder.
self.finalize_stream(StreamKind::Answer);
self.request_redraw();
}
EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
// Buffer only do not emit partial lines. This avoids cases
// where long responses appear truncated if the terminal
// wrapped early. The full message is emitted on
// AgentMessage.
self.begin_stream(StreamKind::Answer);
self.answer_buffer.push_str(&delta);
self.stream_push_and_maybe_commit(&delta);
self.request_redraw();
}
EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => {
// Buffer only disable incremental reasoning streaming so we
// avoid truncated intermediate lines. Full text emitted on
// AgentReasoning.
// Stream CoT into the live pane; keep input visible and commit
// overflow rows incrementally to scrollback.
self.begin_stream(StreamKind::Reasoning);
self.reasoning_buffer.push_str(&delta);
self.stream_push_and_maybe_commit(&delta);
self.request_redraw();
}
EventMsg::AgentReasoning(AgentReasoningEvent { text }) => {
// Emit full reasoning text once. Some providers might send
// final event with empty text if only deltas were used.
let full = if text.is_empty() {
std::mem::take(&mut self.reasoning_buffer)
} else {
self.reasoning_buffer.clear();
text
};
if !full.is_empty() {
self.add_to_history(HistoryCell::new_agent_reasoning(&self.config, full));
}
EventMsg::AgentReasoning(AgentReasoningEvent { text: _ }) => {
// Final reasoning: commit remaining rows and close with a blank.
self.finalize_stream(StreamKind::Reasoning);
self.request_redraw();
}
EventMsg::TaskStarted => {
self.bottom_pane.clear_ctrl_c_quit_hint();
self.bottom_pane.set_task_running(true);
// Replace composer with single-line spinner while waiting.
self.bottom_pane
.update_status_text("waiting for model".to_string());
self.request_redraw();
}
EventMsg::TaskComplete(TaskCompleteEvent {
last_agent_message: _,
}) => {
self.bottom_pane.set_task_running(false);
self.bottom_pane.clear_live_ring();
self.request_redraw();
}
EventMsg::TokenCount(token_usage) => {
@@ -298,8 +301,8 @@ impl ChatWidget<'_> {
self.bottom_pane.set_task_running(false);
}
EventMsg::PlanUpdate(update) => {
// Commit plan updates directly to history (no status-line preview).
self.add_to_history(HistoryCell::new_plan_update(update));
self.request_redraw();
}
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
call_id: _,
@@ -307,8 +310,7 @@ impl ChatWidget<'_> {
cwd,
reason,
}) => {
// Print the command to the history so it is visible in the
// transcript *before* the modal asks for approval.
// Log a background summary immediately so the history is chronological.
let cmdline = strip_bash_lc_and_escape(&command);
let text = format!(
"command requires approval:\n$ {cmdline}{reason}",
@@ -344,7 +346,6 @@ impl ChatWidget<'_> {
// approval dialog) and avoids surprising the user with a modal
// prompt before they have seen *what* is being requested.
// ------------------------------------------------------------------
self.add_to_history(HistoryCell::new_patch_event(
PatchEventType::ApprovalRequest,
changes,
@@ -379,8 +380,6 @@ impl ChatWidget<'_> {
auto_approved,
changes,
}) => {
// Even when a patch is autoapproved we still display the
// summary so the user can follow along.
self.add_to_history(HistoryCell::new_patch_event(
PatchEventType::ApplyBegin { auto_approved },
changes,
@@ -393,6 +392,7 @@ impl ChatWidget<'_> {
stdout,
stderr,
}) => {
// Compute summary before moving stdout into the history cell.
let cmd = self.running_commands.remove(&call_id);
self.add_to_history(HistoryCell::new_completed_exec_command(
cmd.map(|cmd| cmd.command).unwrap_or_else(|| vec![call_id]),
@@ -442,14 +442,15 @@ impl ChatWidget<'_> {
self.app_event_tx.send(AppEvent::ExitRequest);
}
event => {
self.add_to_history(HistoryCell::new_background_event(format!("{event:?}")));
let text = format!("{event:?}");
self.add_to_history(HistoryCell::new_background_event(text.clone()));
self.update_latest_log(text);
}
}
}
/// Update the live log preview while a task is running.
pub(crate) fn update_latest_log(&mut self, line: String) {
// Forward only if we are currently showing the status indicator.
self.bottom_pane.update_status_text(line);
}
@@ -515,6 +516,97 @@ impl ChatWidget<'_> {
}
}
impl ChatWidget<'_> {
fn begin_stream(&mut self, kind: StreamKind) {
if self.current_stream != Some(kind) {
self.current_stream = Some(kind);
self.stream_header_emitted = false;
// Clear any previous live content; we're starting a new stream.
self.live_builder = RowBuilder::new(self.live_builder.width());
// Ensure the waiting status is visible (composer replaced).
self.bottom_pane
.update_status_text("waiting for model".to_string());
}
}
fn stream_push_and_maybe_commit(&mut self, delta: &str) {
self.live_builder.push_fragment(delta);
// Commit overflow rows (small batches) while keeping the last N rows visible.
let drained = self
.live_builder
.drain_commit_ready(self.live_max_rows as usize);
if !drained.is_empty() {
let mut lines: Vec<ratatui::text::Line<'static>> = Vec::new();
if !self.stream_header_emitted {
match self.current_stream {
Some(StreamKind::Reasoning) => {
lines.push(ratatui::text::Line::from("thinking".magenta().italic()));
}
Some(StreamKind::Answer) => {
lines.push(ratatui::text::Line::from("codex".magenta().bold()));
}
None => {}
}
self.stream_header_emitted = true;
}
for r in drained {
lines.push(ratatui::text::Line::from(r.text));
}
self.app_event_tx.send(AppEvent::InsertHistory(lines));
}
// Update the live ring overlay lines (text-only, newest at bottom).
let rows = self
.live_builder
.display_rows()
.into_iter()
.map(|r| ratatui::text::Line::from(r.text))
.collect::<Vec<_>>();
self.bottom_pane
.set_live_ring_rows(self.live_max_rows, rows);
}
fn finalize_stream(&mut self, kind: StreamKind) {
if self.current_stream != Some(kind) {
// Nothing to do; either already finalized or not the active stream.
return;
}
// Flush any partial line as a full row, then drain all remaining rows.
self.live_builder.end_line();
let remaining = self.live_builder.drain_rows();
// TODO: Re-add markdown rendering for assistant answers and reasoning.
// When finalizing, pass the accumulated text through `markdown::append_markdown`
// to build styled `Line<'static>` entries instead of raw plain text lines.
if !remaining.is_empty() || !self.stream_header_emitted {
let mut lines: Vec<ratatui::text::Line<'static>> = Vec::new();
if !self.stream_header_emitted {
match kind {
StreamKind::Reasoning => {
lines.push(ratatui::text::Line::from("thinking".magenta().italic()));
}
StreamKind::Answer => {
lines.push(ratatui::text::Line::from("codex".magenta().bold()));
}
}
self.stream_header_emitted = true;
}
for r in remaining {
lines.push(ratatui::text::Line::from(r.text));
}
// Close the block with a blank line for readability.
lines.push(ratatui::text::Line::from(""));
self.app_event_tx.send(AppEvent::InsertHistory(lines));
}
// Clear the live overlay and reset state for the next stream.
self.live_builder = RowBuilder::new(self.live_builder.width());
self.bottom_pane.clear_live_ring();
self.current_stream = None;
self.stream_header_emitted = false;
}
}
impl WidgetRef for &ChatWidget<'_> {
fn render_ref(&self, area: Rect, buf: &mut Buffer) {
// In the hybrid inline viewport mode we only draw the interactive