Stream model responses (#1810)
Stream models thoughts and responses instead of waiting for the whole thing to come through. Very rough right now, but I'm making the risk call to push through.
This commit is contained in:
@@ -260,6 +260,11 @@ async fn process_chat_sse<S>(
|
||||
.and_then(|d| d.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
{
|
||||
// Emit a delta so downstream consumers can stream text live.
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::OutputTextDelta(content.to_string())))
|
||||
.await;
|
||||
|
||||
let item = ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
@@ -439,11 +444,14 @@ where
|
||||
// will never appear in a Chat Completions stream.
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(_))))
|
||||
| Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(_)))) => {
|
||||
// Deltas are ignored here since aggregation waits for the
|
||||
// final OutputItemDone.
|
||||
continue;
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
|
||||
// Forward deltas unchanged so callers can stream text
|
||||
// live while still receiving a single aggregated
|
||||
// OutputItemDone at the end of the turn.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(delta)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(delta))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,7 +123,7 @@ impl Codex {
|
||||
let resume_path = config.experimental_resume.clone();
|
||||
info!("resume_path: {resume_path:?}");
|
||||
let (tx_sub, rx_sub) = async_channel::bounded(64);
|
||||
let (tx_event, rx_event) = async_channel::bounded(1600);
|
||||
let (tx_event, rx_event) = async_channel::unbounded();
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
@@ -701,7 +701,7 @@ async fn submission_loop(
|
||||
cwd,
|
||||
resume_path,
|
||||
} => {
|
||||
info!(
|
||||
debug!(
|
||||
"Configuring session: model={model}; provider={provider:?}; resume={resume_path:?}"
|
||||
);
|
||||
if !cwd.is_absolute() {
|
||||
@@ -1374,6 +1374,11 @@ async fn try_run_turn(
|
||||
return Ok(output);
|
||||
}
|
||||
ResponseEvent::OutputTextDelta(delta) => {
|
||||
{
|
||||
let mut st = sess.state.lock().unwrap();
|
||||
st.history.append_assistant_text(&delta);
|
||||
}
|
||||
|
||||
let event = Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }),
|
||||
@@ -1921,7 +1926,8 @@ async fn handle_sandbox_error(
|
||||
// include additional metadata on the command to indicate whether non-zero
|
||||
// exit codes merit a retry.
|
||||
|
||||
// For now, we categorically ask the user to retry without sandbox.
|
||||
// For now, we categorically ask the user to retry without sandbox and
|
||||
// emit the raw error as a background event.
|
||||
sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
|
||||
.await;
|
||||
|
||||
|
||||
@@ -24,9 +24,52 @@ impl ConversationHistory {
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
for item in items {
|
||||
if is_api_message(&item) {
|
||||
// Note agent-loop.ts also does filtering on some of the fields.
|
||||
self.items.push(item.clone());
|
||||
if !is_api_message(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Merge adjacent assistant messages into a single history entry.
|
||||
// This prevents duplicates when a partial assistant message was
|
||||
// streamed into history earlier in the turn and the final full
|
||||
// message is recorded at turn end.
|
||||
match (&*item, self.items.last_mut()) {
|
||||
(
|
||||
ResponseItem::Message {
|
||||
role: new_role,
|
||||
content: new_content,
|
||||
..
|
||||
},
|
||||
Some(ResponseItem::Message {
|
||||
role: last_role,
|
||||
content: last_content,
|
||||
..
|
||||
}),
|
||||
) if new_role == "assistant" && last_role == "assistant" => {
|
||||
append_text_content(last_content, new_content);
|
||||
}
|
||||
_ => {
|
||||
self.items.push(item.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Append a text `delta` to the latest assistant message, creating a new
|
||||
/// assistant entry if none exists yet (e.g. first delta for this turn).
|
||||
pub(crate) fn append_assistant_text(&mut self, delta: &str) {
|
||||
match self.items.last_mut() {
|
||||
Some(ResponseItem::Message { role, content, .. }) if role == "assistant" => {
|
||||
append_text_delta(content, delta);
|
||||
}
|
||||
_ => {
|
||||
// Start a new assistant message with the delta.
|
||||
self.items.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![crate::models::ContentItem::OutputText {
|
||||
text: delta.to_string(),
|
||||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,3 +115,140 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper to append the textual content from `src` into `dst` in place.
|
||||
fn append_text_content(
|
||||
dst: &mut Vec<crate::models::ContentItem>,
|
||||
src: &Vec<crate::models::ContentItem>,
|
||||
) {
|
||||
for c in src {
|
||||
if let crate::models::ContentItem::OutputText { text } = c {
|
||||
append_text_delta(dst, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Append a single text delta to the last OutputText item in `content`, or
|
||||
/// push a new OutputText item if none exists.
|
||||
fn append_text_delta(content: &mut Vec<crate::models::ContentItem>, delta: &str) {
|
||||
if let Some(crate::models::ContentItem::OutputText { text }) = content
|
||||
.iter_mut()
|
||||
.rev()
|
||||
.find(|c| matches!(c, crate::models::ContentItem::OutputText { .. }))
|
||||
{
|
||||
text.push_str(delta);
|
||||
} else {
|
||||
content.push(crate::models::ContentItem::OutputText {
|
||||
text: delta.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::models::ContentItem;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn user_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merges_adjacent_assistant_messages() {
|
||||
let mut h = ConversationHistory::default();
|
||||
let a1 = assistant_msg("Hello");
|
||||
let a2 = assistant_msg(", world!");
|
||||
h.record_items([&a1, &a2]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "Hello, world!".to_string()
|
||||
}]
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn append_assistant_text_creates_and_appends() {
|
||||
let mut h = ConversationHistory::default();
|
||||
h.append_assistant_text("Hello");
|
||||
h.append_assistant_text(", world");
|
||||
|
||||
// Now record a final full assistant message and verify it merges.
|
||||
let final_msg = assistant_msg("!");
|
||||
h.record_items([&final_msg]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "Hello, world!".to_string()
|
||||
}]
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ConversationHistory::default();
|
||||
// System message is not an API message; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "system".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "ignored".to_string(),
|
||||
}],
|
||||
};
|
||||
h.record_items([&system, &ResponseItem::Other]);
|
||||
|
||||
// User and assistant should be retained.
|
||||
let u = user_msg("hi");
|
||||
let a = assistant_msg("hello");
|
||||
h.record_items([&u, &a]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hi".to_string()
|
||||
}]
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hello".to_string()
|
||||
}]
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use serde::ser::Serializer;
|
||||
|
||||
use crate::protocol::InputItem;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseInputItem {
|
||||
Message {
|
||||
@@ -26,7 +26,7 @@ pub enum ResponseInputItem {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ContentItem {
|
||||
InputText { text: String },
|
||||
@@ -34,7 +34,7 @@ pub enum ContentItem {
|
||||
OutputText { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseItem {
|
||||
Message {
|
||||
@@ -107,7 +107,7 @@ impl From<ResponseInputItem> for ResponseItem {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum LocalShellStatus {
|
||||
Completed,
|
||||
@@ -115,13 +115,13 @@ pub enum LocalShellStatus {
|
||||
Incomplete,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum LocalShellAction {
|
||||
Exec(LocalShellExecAction),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct LocalShellExecAction {
|
||||
pub command: Vec<String>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
@@ -130,7 +130,7 @@ pub struct LocalShellExecAction {
|
||||
pub user: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ReasoningItemReasoningSummary {
|
||||
SummaryText { text: String },
|
||||
@@ -185,10 +185,9 @@ pub struct ShellToolCallParams {
|
||||
pub timeout_ms: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct FunctionCallOutputPayload {
|
||||
pub content: String,
|
||||
#[expect(dead_code)]
|
||||
pub success: Option<bool>,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user