Stream model responses (#1810)

Stream models thoughts and responses instead of waiting for the whole thing to come through. Very rough right now, but I'm making the risk call to push through.
2025-08-04 21:23:22 -07:00
parent 063083af15
commit 906d449760
17 changed files with 1616 additions and 234 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -260,6 +260,11 @@ async fn process_chat_sse<S>(
                .and_then(|d| d.get("content"))
                .and_then(|c| c.as_str())
            {
+                // Emit a delta so downstream consumers can stream text live.
+                let _ = tx_event
+                    .send(Ok(ResponseEvent::OutputTextDelta(content.to_string())))
+                    .await;
+
                let item = ResponseItem::Message {
                    role: "assistant".to_string(),
                    content: vec![ContentItem::OutputText {
@@ -439,11 +444,14 @@ where
                    // will never appear in a Chat Completions stream.
                    continue;
                }
-                Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(_))))
-                | Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(_)))) => {
-                    // Deltas are ignored here since aggregation waits for the
-                    // final OutputItemDone.
-                    continue;
+                Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
+                    // Forward deltas unchanged so callers can stream text
+                    // live while still receiving a single aggregated
+                    // OutputItemDone at the end of the turn.
+                    return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
+                }
+                Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(delta)))) => {
+                    return Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(delta))));
                }
            }
        }