2025-05-08 21:46:06 -07:00
|
|
|
|
use std::time::Duration;
|
|
|
|
|
|
|
|
|
|
|
|
use bytes::Bytes;
|
|
|
|
|
|
use eventsource_stream::Eventsource;
|
|
|
|
|
|
use futures::Stream;
|
|
|
|
|
|
use futures::StreamExt;
|
|
|
|
|
|
use futures::TryStreamExt;
|
|
|
|
|
|
use reqwest::StatusCode;
|
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
use std::pin::Pin;
|
|
|
|
|
|
use std::task::Context;
|
|
|
|
|
|
use std::task::Poll;
|
|
|
|
|
|
use tokio::sync::mpsc;
|
|
|
|
|
|
use tokio::time::timeout;
|
|
|
|
|
|
use tracing::debug;
|
|
|
|
|
|
use tracing::trace;
|
|
|
|
|
|
|
|
|
|
|
|
use crate::ModelProviderInfo;
|
|
|
|
|
|
use crate::client_common::Prompt;
|
|
|
|
|
|
use crate::client_common::ResponseEvent;
|
|
|
|
|
|
use crate::client_common::ResponseStream;
|
|
|
|
|
|
use crate::error::CodexErr;
|
|
|
|
|
|
use crate::error::Result;
|
2025-08-04 23:50:03 -07:00
|
|
|
|
use crate::model_family::ModelFamily;
|
2025-05-08 21:46:06 -07:00
|
|
|
|
use crate::models::ContentItem;
|
2025-08-05 01:56:13 -07:00
|
|
|
|
use crate::models::ReasoningItemContent;
|
2025-05-08 21:46:06 -07:00
|
|
|
|
use crate::models::ResponseItem;
|
2025-05-30 14:07:03 -07:00
|
|
|
|
use crate::openai_tools::create_tools_json_for_chat_completions_api;
|
2025-05-08 21:46:06 -07:00
|
|
|
|
use crate::util::backoff;
|
|
|
|
|
|
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
/// Implementation for the classic Chat Completions API.
|
2025-05-08 21:46:06 -07:00
|
|
|
|
pub(crate) async fn stream_chat_completions(
|
|
|
|
|
|
prompt: &Prompt,
|
2025-08-04 23:50:03 -07:00
|
|
|
|
model_family: &ModelFamily,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
client: &reqwest::Client,
|
|
|
|
|
|
provider: &ModelProviderInfo,
|
|
|
|
|
|
) -> Result<ResponseStream> {
|
|
|
|
|
|
// Build messages array
|
|
|
|
|
|
let mut messages = Vec::<serde_json::Value>::new();
|
|
|
|
|
|
|
2025-08-04 23:50:03 -07:00
|
|
|
|
let full_instructions = prompt.get_full_instructions(model_family);
|
2025-05-12 17:24:44 -07:00
|
|
|
|
messages.push(json!({"role": "system", "content": full_instructions}));
|
2025-05-08 21:46:06 -07:00
|
|
|
|
|
2025-08-06 01:13:31 -07:00
|
|
|
|
let input = prompt.get_formatted_input();
|
2025-07-30 13:56:24 -07:00
|
|
|
|
|
2025-08-06 01:13:31 -07:00
|
|
|
|
for item in &input {
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
match item {
|
2025-07-23 10:37:45 -07:00
|
|
|
|
ResponseItem::Message { role, content, .. } => {
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
let mut text = String::new();
|
|
|
|
|
|
for c in content {
|
|
|
|
|
|
match c {
|
|
|
|
|
|
ContentItem::InputText { text: t }
|
|
|
|
|
|
| ContentItem::OutputText { text: t } => {
|
|
|
|
|
|
text.push_str(t);
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => {}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
messages.push(json!({"role": role, "content": text}));
|
|
|
|
|
|
}
|
|
|
|
|
|
ResponseItem::FunctionCall {
|
|
|
|
|
|
name,
|
|
|
|
|
|
arguments,
|
|
|
|
|
|
call_id,
|
2025-07-23 10:37:45 -07:00
|
|
|
|
..
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
} => {
|
|
|
|
|
|
messages.push(json!({
|
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
|
"content": null,
|
|
|
|
|
|
"tool_calls": [{
|
|
|
|
|
|
"id": call_id,
|
|
|
|
|
|
"type": "function",
|
|
|
|
|
|
"function": {
|
|
|
|
|
|
"name": name,
|
|
|
|
|
|
"arguments": arguments,
|
|
|
|
|
|
}
|
|
|
|
|
|
}]
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
ResponseItem::LocalShellCall {
|
|
|
|
|
|
id,
|
|
|
|
|
|
call_id: _,
|
|
|
|
|
|
status,
|
|
|
|
|
|
action,
|
|
|
|
|
|
} => {
|
|
|
|
|
|
// Confirm with API team.
|
|
|
|
|
|
messages.push(json!({
|
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
|
"content": null,
|
|
|
|
|
|
"tool_calls": [{
|
|
|
|
|
|
"id": id.clone().unwrap_or_else(|| "".to_string()),
|
|
|
|
|
|
"type": "local_shell_call",
|
|
|
|
|
|
"status": status,
|
|
|
|
|
|
"action": action,
|
|
|
|
|
|
}]
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
ResponseItem::FunctionCallOutput { call_id, output } => {
|
|
|
|
|
|
messages.push(json!({
|
|
|
|
|
|
"role": "tool",
|
|
|
|
|
|
"tool_call_id": call_id,
|
|
|
|
|
|
"content": output.content,
|
|
|
|
|
|
}));
|
|
|
|
|
|
}
|
|
|
|
|
|
ResponseItem::Reasoning { .. } | ResponseItem::Other => {
|
|
|
|
|
|
// Omit these items from the conversation history.
|
|
|
|
|
|
continue;
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-05 19:27:52 -07:00
|
|
|
|
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
|
2025-05-08 21:46:06 -07:00
|
|
|
|
let payload = json!({
|
2025-08-04 23:50:03 -07:00
|
|
|
|
"model": model_family.slug,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
"messages": messages,
|
2025-05-30 14:07:03 -07:00
|
|
|
|
"stream": true,
|
|
|
|
|
|
"tools": tools_json,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
});
|
|
|
|
|
|
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
debug!(
|
2025-07-07 13:09:16 -07:00
|
|
|
|
"POST to {}: {}",
|
2025-08-04 18:07:49 -07:00
|
|
|
|
provider.get_full_url(&None),
|
2025-05-30 14:07:03 -07:00
|
|
|
|
serde_json::to_string_pretty(&payload).unwrap_or_default()
|
|
|
|
|
|
);
|
2025-05-08 21:46:06 -07:00
|
|
|
|
|
|
|
|
|
|
let mut attempt = 0;
|
2025-07-18 12:12:39 -07:00
|
|
|
|
let max_retries = provider.request_max_retries();
|
2025-05-08 21:46:06 -07:00
|
|
|
|
loop {
|
|
|
|
|
|
attempt += 1;
|
|
|
|
|
|
|
2025-08-04 18:07:49 -07:00
|
|
|
|
let req_builder = provider.create_request_builder(client, &None).await?;
|
2025-07-07 13:09:16 -07:00
|
|
|
|
|
2025-05-08 21:46:06 -07:00
|
|
|
|
let res = req_builder
|
|
|
|
|
|
.header(reqwest::header::ACCEPT, "text/event-stream")
|
|
|
|
|
|
.json(&payload)
|
|
|
|
|
|
.send()
|
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
|
|
match res {
|
|
|
|
|
|
Ok(resp) if resp.status().is_success() => {
|
2025-07-16 15:11:18 -07:00
|
|
|
|
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
2025-05-08 21:46:06 -07:00
|
|
|
|
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
|
2025-07-18 12:12:39 -07:00
|
|
|
|
tokio::spawn(process_chat_sse(
|
|
|
|
|
|
stream,
|
|
|
|
|
|
tx_event,
|
|
|
|
|
|
provider.stream_idle_timeout(),
|
|
|
|
|
|
));
|
2025-05-08 21:46:06 -07:00
|
|
|
|
return Ok(ResponseStream { rx_event });
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(res) => {
|
|
|
|
|
|
let status = res.status();
|
|
|
|
|
|
if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
|
|
|
|
|
|
let body = (res.text().await).unwrap_or_default();
|
|
|
|
|
|
return Err(CodexErr::UnexpectedStatus(status, body));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-07-18 12:12:39 -07:00
|
|
|
|
if attempt > max_retries {
|
2025-05-08 21:46:06 -07:00
|
|
|
|
return Err(CodexErr::RetryLimit(status));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let retry_after_secs = res
|
|
|
|
|
|
.headers()
|
|
|
|
|
|
.get(reqwest::header::RETRY_AFTER)
|
|
|
|
|
|
.and_then(|v| v.to_str().ok())
|
|
|
|
|
|
.and_then(|s| s.parse::<u64>().ok());
|
|
|
|
|
|
|
|
|
|
|
|
let delay = retry_after_secs
|
|
|
|
|
|
.map(|s| Duration::from_millis(s * 1_000))
|
|
|
|
|
|
.unwrap_or_else(|| backoff(attempt));
|
|
|
|
|
|
tokio::time::sleep(delay).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
Err(e) => {
|
2025-07-18 12:12:39 -07:00
|
|
|
|
if attempt > max_retries {
|
2025-05-08 21:46:06 -07:00
|
|
|
|
return Err(e.into());
|
|
|
|
|
|
}
|
|
|
|
|
|
let delay = backoff(attempt);
|
|
|
|
|
|
tokio::time::sleep(delay).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Lightweight SSE processor for the Chat Completions streaming format. The
|
|
|
|
|
|
/// output is mapped onto Codex's internal [`ResponseEvent`] so that the rest
|
|
|
|
|
|
/// of the pipeline can stay agnostic of the underlying wire format.
|
2025-07-18 12:12:39 -07:00
|
|
|
|
async fn process_chat_sse<S>(
|
|
|
|
|
|
stream: S,
|
|
|
|
|
|
tx_event: mpsc::Sender<Result<ResponseEvent>>,
|
|
|
|
|
|
idle_timeout: Duration,
|
|
|
|
|
|
) where
|
2025-05-08 21:46:06 -07:00
|
|
|
|
S: Stream<Item = Result<Bytes>> + Unpin,
|
|
|
|
|
|
{
|
|
|
|
|
|
let mut stream = stream.eventsource();
|
|
|
|
|
|
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
// State to accumulate a function call across streaming chunks.
|
|
|
|
|
|
// OpenAI may split the `arguments` string over multiple `delta` events
|
|
|
|
|
|
// until the chunk whose `finish_reason` is `tool_calls` is emitted. We
|
|
|
|
|
|
// keep collecting the pieces here and forward a single
|
|
|
|
|
|
// `ResponseItem::FunctionCall` once the call is complete.
|
|
|
|
|
|
#[derive(Default)]
|
|
|
|
|
|
struct FunctionCallState {
|
|
|
|
|
|
name: Option<String>,
|
|
|
|
|
|
arguments: String,
|
|
|
|
|
|
call_id: Option<String>,
|
|
|
|
|
|
active: bool,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut fn_call_state = FunctionCallState::default();
|
2025-08-05 01:56:13 -07:00
|
|
|
|
let mut assistant_text = String::new();
|
|
|
|
|
|
let mut reasoning_text = String::new();
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
|
2025-05-08 21:46:06 -07:00
|
|
|
|
loop {
|
|
|
|
|
|
let sse = match timeout(idle_timeout, stream.next()).await {
|
|
|
|
|
|
Ok(Some(Ok(ev))) => ev,
|
|
|
|
|
|
Ok(Some(Err(e))) => {
|
|
|
|
|
|
let _ = tx_event.send(Err(CodexErr::Stream(e.to_string()))).await;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(None) => {
|
|
|
|
|
|
// Stream closed gracefully – emit Completed with dummy id.
|
|
|
|
|
|
let _ = tx_event
|
|
|
|
|
|
.send(Ok(ResponseEvent::Completed {
|
|
|
|
|
|
response_id: String::new(),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
token_usage: None,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}))
|
|
|
|
|
|
.await;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
Err(_) => {
|
|
|
|
|
|
let _ = tx_event
|
|
|
|
|
|
.send(Err(CodexErr::Stream("idle timeout waiting for SSE".into())))
|
|
|
|
|
|
.await;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// OpenAI Chat streaming sends a literal string "[DONE]" when finished.
|
|
|
|
|
|
if sse.data.trim() == "[DONE]" {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Emit any finalized items before closing so downstream consumers receive
|
|
|
|
|
|
// terminal events for both assistant content and raw reasoning.
|
|
|
|
|
|
if !assistant_text.is_empty() {
|
|
|
|
|
|
let item = ResponseItem::Message {
|
|
|
|
|
|
role: "assistant".to_string(),
|
|
|
|
|
|
content: vec![ContentItem::OutputText {
|
|
|
|
|
|
text: std::mem::take(&mut assistant_text),
|
|
|
|
|
|
}],
|
|
|
|
|
|
id: None,
|
|
|
|
|
|
};
|
|
|
|
|
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if !reasoning_text.is_empty() {
|
|
|
|
|
|
let item = ResponseItem::Reasoning {
|
|
|
|
|
|
id: String::new(),
|
|
|
|
|
|
summary: Vec::new(),
|
|
|
|
|
|
content: Some(vec![ReasoningItemContent::ReasoningText {
|
|
|
|
|
|
text: std::mem::take(&mut reasoning_text),
|
|
|
|
|
|
}]),
|
|
|
|
|
|
encrypted_content: None,
|
|
|
|
|
|
};
|
|
|
|
|
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-05-08 21:46:06 -07:00
|
|
|
|
let _ = tx_event
|
|
|
|
|
|
.send(Ok(ResponseEvent::Completed {
|
|
|
|
|
|
response_id: String::new(),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
token_usage: None,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}))
|
|
|
|
|
|
.await;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Parse JSON chunk
|
|
|
|
|
|
let chunk: serde_json::Value = match serde_json::from_str(&sse.data) {
|
|
|
|
|
|
Ok(v) => v,
|
|
|
|
|
|
Err(_) => continue,
|
|
|
|
|
|
};
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
trace!("chat_completions received SSE chunk: {chunk:?}");
|
|
|
|
|
|
|
|
|
|
|
|
let choice_opt = chunk.get("choices").and_then(|c| c.get(0));
|
2025-05-08 21:46:06 -07:00
|
|
|
|
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
if let Some(choice) = choice_opt {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Handle assistant content tokens as streaming deltas.
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
if let Some(content) = choice
|
|
|
|
|
|
.get("delta")
|
|
|
|
|
|
.and_then(|d| d.get("content"))
|
|
|
|
|
|
.and_then(|c| c.as_str())
|
|
|
|
|
|
{
|
2025-08-05 01:56:13 -07:00
|
|
|
|
if !content.is_empty() {
|
|
|
|
|
|
assistant_text.push_str(content);
|
|
|
|
|
|
let _ = tx_event
|
|
|
|
|
|
.send(Ok(ResponseEvent::OutputTextDelta(content.to_string())))
|
|
|
|
|
|
.await;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-08-04 21:23:22 -07:00
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Forward any reasoning/thinking deltas if present.
|
|
|
|
|
|
// Some providers stream `reasoning` as a plain string while others
|
|
|
|
|
|
// nest the text under an object (e.g. `{ "reasoning": { "text": "…" } }`).
|
|
|
|
|
|
if let Some(reasoning_val) = choice.get("delta").and_then(|d| d.get("reasoning")) {
|
|
|
|
|
|
let mut maybe_text = reasoning_val.as_str().map(|s| s.to_string());
|
|
|
|
|
|
|
|
|
|
|
|
if maybe_text.is_none() && reasoning_val.is_object() {
|
|
|
|
|
|
if let Some(s) = reasoning_val
|
|
|
|
|
|
.get("text")
|
|
|
|
|
|
.and_then(|t| t.as_str())
|
|
|
|
|
|
.filter(|s| !s.is_empty())
|
|
|
|
|
|
{
|
|
|
|
|
|
maybe_text = Some(s.to_string());
|
|
|
|
|
|
} else if let Some(s) = reasoning_val
|
|
|
|
|
|
.get("content")
|
|
|
|
|
|
.and_then(|t| t.as_str())
|
|
|
|
|
|
.filter(|s| !s.is_empty())
|
|
|
|
|
|
{
|
|
|
|
|
|
maybe_text = Some(s.to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-08-04 17:03:24 -07:00
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
|
if let Some(reasoning) = maybe_text {
|
|
|
|
|
|
let _ = tx_event
|
|
|
|
|
|
.send(Ok(ResponseEvent::ReasoningContentDelta(reasoning)))
|
|
|
|
|
|
.await;
|
|
|
|
|
|
}
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Handle streaming function / tool calls.
|
|
|
|
|
|
if let Some(tool_calls) = choice
|
|
|
|
|
|
.get("delta")
|
|
|
|
|
|
.and_then(|d| d.get("tool_calls"))
|
|
|
|
|
|
.and_then(|tc| tc.as_array())
|
|
|
|
|
|
{
|
|
|
|
|
|
if let Some(tool_call) = tool_calls.first() {
|
|
|
|
|
|
// Mark that we have an active function call in progress.
|
|
|
|
|
|
fn_call_state.active = true;
|
|
|
|
|
|
|
|
|
|
|
|
// Extract call_id if present.
|
|
|
|
|
|
if let Some(id) = tool_call.get("id").and_then(|v| v.as_str()) {
|
|
|
|
|
|
fn_call_state.call_id.get_or_insert_with(|| id.to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Extract function details if present.
|
|
|
|
|
|
if let Some(function) = tool_call.get("function") {
|
|
|
|
|
|
if let Some(name) = function.get("name").and_then(|n| n.as_str()) {
|
|
|
|
|
|
fn_call_state.name.get_or_insert_with(|| name.to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if let Some(args_fragment) =
|
|
|
|
|
|
function.get("arguments").and_then(|a| a.as_str())
|
|
|
|
|
|
{
|
|
|
|
|
|
fn_call_state.arguments.push_str(args_fragment);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Emit end-of-turn when finish_reason signals completion.
|
|
|
|
|
|
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
|
|
|
|
|
|
match finish_reason {
|
|
|
|
|
|
"tool_calls" if fn_call_state.active => {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// First, flush the terminal raw reasoning so UIs can finalize
|
|
|
|
|
|
// the reasoning stream before any exec/tool events begin.
|
|
|
|
|
|
if !reasoning_text.is_empty() {
|
|
|
|
|
|
let item = ResponseItem::Reasoning {
|
|
|
|
|
|
id: String::new(),
|
|
|
|
|
|
summary: Vec::new(),
|
|
|
|
|
|
content: Some(vec![ReasoningItemContent::ReasoningText {
|
|
|
|
|
|
text: std::mem::take(&mut reasoning_text),
|
|
|
|
|
|
}]),
|
|
|
|
|
|
encrypted_content: None,
|
|
|
|
|
|
};
|
|
|
|
|
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Then emit the FunctionCall response item.
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
let item = ResponseItem::FunctionCall {
|
2025-07-23 10:37:45 -07:00
|
|
|
|
id: None,
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
|
|
|
|
|
|
arguments: fn_call_state.arguments.clone(),
|
|
|
|
|
|
call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
"stop" => {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Regular turn without tool-call. Emit the final assistant message
|
|
|
|
|
|
// as a single OutputItemDone so non-delta consumers see the result.
|
|
|
|
|
|
if !assistant_text.is_empty() {
|
|
|
|
|
|
let item = ResponseItem::Message {
|
|
|
|
|
|
role: "assistant".to_string(),
|
|
|
|
|
|
content: vec![ContentItem::OutputText {
|
|
|
|
|
|
text: std::mem::take(&mut assistant_text),
|
|
|
|
|
|
}],
|
|
|
|
|
|
id: None,
|
|
|
|
|
|
};
|
|
|
|
|
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Also emit a terminal Reasoning item so UIs can finalize raw reasoning.
|
|
|
|
|
|
if !reasoning_text.is_empty() {
|
|
|
|
|
|
let item = ResponseItem::Reasoning {
|
|
|
|
|
|
id: String::new(),
|
|
|
|
|
|
summary: Vec::new(),
|
|
|
|
|
|
content: Some(vec![ReasoningItemContent::ReasoningText {
|
|
|
|
|
|
text: std::mem::take(&mut reasoning_text),
|
|
|
|
|
|
}]),
|
|
|
|
|
|
encrypted_content: None,
|
|
|
|
|
|
};
|
|
|
|
|
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
|
|
|
|
|
}
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
}
|
|
|
|
|
|
_ => {}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Emit Completed regardless of reason so the agent can advance.
|
|
|
|
|
|
let _ = tx_event
|
|
|
|
|
|
.send(Ok(ResponseEvent::Completed {
|
|
|
|
|
|
response_id: String::new(),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
token_usage: None,
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
}))
|
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
|
|
// Prepare for potential next turn (should not happen in same stream).
|
|
|
|
|
|
// fn_call_state = FunctionCallState::default();
|
|
|
|
|
|
|
|
|
|
|
|
return; // End processing for this SSE stream.
|
|
|
|
|
|
}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Optional client-side aggregation helper
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Stream adapter that merges the incremental `OutputItemDone` chunks coming from
|
|
|
|
|
|
/// [`process_chat_sse`] into a *running* assistant message, **suppressing the
|
|
|
|
|
|
/// per-token deltas**. The stream stays silent while the model is thinking
|
|
|
|
|
|
/// and only emits two events per turn:
|
|
|
|
|
|
///
|
|
|
|
|
|
/// 1. `ResponseEvent::OutputItemDone` with the *complete* assistant message
|
|
|
|
|
|
/// (fully concatenated).
|
|
|
|
|
|
/// 2. The original `ResponseEvent::Completed` right after it.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// This mirrors the behaviour the TypeScript CLI exposes to its higher layers.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// The adapter is intentionally *lossless*: callers who do **not** opt in via
|
|
|
|
|
|
/// [`AggregateStreamExt::aggregate()`] keep receiving the original unmodified
|
|
|
|
|
|
/// events.
|
2025-08-05 01:56:13 -07:00
|
|
|
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
|
|
|
|
|
enum AggregateMode {
|
|
|
|
|
|
AggregatedOnly,
|
|
|
|
|
|
Streaming,
|
|
|
|
|
|
}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
pub(crate) struct AggregatedChatStream<S> {
|
|
|
|
|
|
inner: S,
|
|
|
|
|
|
cumulative: String,
|
2025-08-05 01:56:13 -07:00
|
|
|
|
cumulative_reasoning: String,
|
|
|
|
|
|
pending: std::collections::VecDeque<ResponseEvent>,
|
|
|
|
|
|
mode: AggregateMode,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl<S> Stream for AggregatedChatStream<S>
|
|
|
|
|
|
where
|
|
|
|
|
|
S: Stream<Item = Result<ResponseEvent>> + Unpin,
|
|
|
|
|
|
{
|
|
|
|
|
|
type Item = Result<ResponseEvent>;
|
|
|
|
|
|
|
|
|
|
|
|
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
|
|
|
|
|
let this = self.get_mut();
|
|
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// First, flush any buffered events from the previous call.
|
|
|
|
|
|
if let Some(ev) = this.pending.pop_front() {
|
2025-05-08 21:46:06 -07:00
|
|
|
|
return Poll::Ready(Some(Ok(ev)));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
|
match Pin::new(&mut this.inner).poll_next(cx) {
|
|
|
|
|
|
Poll::Pending => return Poll::Pending,
|
|
|
|
|
|
Poll::Ready(None) => return Poll::Ready(None),
|
|
|
|
|
|
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
|
|
|
|
|
|
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
// If this is an incremental assistant message chunk, accumulate but
|
|
|
|
|
|
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
|
|
|
|
|
|
// away so downstream consumers see it.
|
|
|
|
|
|
|
|
|
|
|
|
let is_assistant_delta = matches!(&item, crate::models::ResponseItem::Message { role, .. } if role == "assistant");
|
|
|
|
|
|
|
|
|
|
|
|
if is_assistant_delta {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Only use the final assistant message if we have not
|
|
|
|
|
|
// seen any deltas; otherwise, deltas already built the
|
|
|
|
|
|
// cumulative text and this would duplicate it.
|
|
|
|
|
|
if this.cumulative.is_empty() {
|
|
|
|
|
|
if let crate::models::ResponseItem::Message { content, .. } = &item {
|
|
|
|
|
|
if let Some(text) = content.iter().find_map(|c| match c {
|
|
|
|
|
|
crate::models::ContentItem::OutputText { text } => Some(text),
|
|
|
|
|
|
_ => None,
|
|
|
|
|
|
}) {
|
|
|
|
|
|
this.cumulative.push_str(text);
|
|
|
|
|
|
}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Swallow assistant message here; emit on Completed.
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
continue;
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:
1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.
While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)
The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:
- Responses API
- Chat Completions API
- Responses API used in concert with ZDR
I like to think things are a bit cleaner now where:
- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
|
|
|
|
// Not an assistant message – forward immediately.
|
|
|
|
|
|
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
|
|
|
|
|
response_id,
|
|
|
|
|
|
token_usage,
|
|
|
|
|
|
}))) => {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Build any aggregated items in the correct order: Reasoning first, then Message.
|
|
|
|
|
|
let mut emitted_any = false;
|
|
|
|
|
|
|
|
|
|
|
|
if !this.cumulative_reasoning.is_empty()
|
|
|
|
|
|
&& matches!(this.mode, AggregateMode::AggregatedOnly)
|
|
|
|
|
|
{
|
|
|
|
|
|
let aggregated_reasoning = crate::models::ResponseItem::Reasoning {
|
|
|
|
|
|
id: String::new(),
|
|
|
|
|
|
summary: Vec::new(),
|
|
|
|
|
|
content: Some(vec![
|
|
|
|
|
|
crate::models::ReasoningItemContent::ReasoningText {
|
|
|
|
|
|
text: std::mem::take(&mut this.cumulative_reasoning),
|
|
|
|
|
|
},
|
|
|
|
|
|
]),
|
|
|
|
|
|
encrypted_content: None,
|
|
|
|
|
|
};
|
|
|
|
|
|
this.pending
|
|
|
|
|
|
.push_back(ResponseEvent::OutputItemDone(aggregated_reasoning));
|
|
|
|
|
|
emitted_any = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-05-08 21:46:06 -07:00
|
|
|
|
if !this.cumulative.is_empty() {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
let aggregated_message = crate::models::ResponseItem::Message {
|
2025-07-23 10:37:45 -07:00
|
|
|
|
id: None,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
role: "assistant".to_string(),
|
|
|
|
|
|
content: vec![crate::models::ContentItem::OutputText {
|
|
|
|
|
|
text: std::mem::take(&mut this.cumulative),
|
|
|
|
|
|
}],
|
|
|
|
|
|
};
|
2025-08-05 01:56:13 -07:00
|
|
|
|
this.pending
|
|
|
|
|
|
.push_back(ResponseEvent::OutputItemDone(aggregated_message));
|
|
|
|
|
|
emitted_any = true;
|
|
|
|
|
|
}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Always emit Completed last when anything was aggregated.
|
|
|
|
|
|
if emitted_any {
|
|
|
|
|
|
this.pending.push_back(ResponseEvent::Completed {
|
|
|
|
|
|
response_id: response_id.clone(),
|
|
|
|
|
|
token_usage: token_usage.clone(),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
});
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Return the first pending event now.
|
|
|
|
|
|
if let Some(ev) = this.pending.pop_front() {
|
|
|
|
|
|
return Poll::Ready(Some(Ok(ev)));
|
|
|
|
|
|
}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Nothing aggregated – forward Completed directly.
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
|
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
|
|
|
|
|
response_id,
|
|
|
|
|
|
token_usage,
|
|
|
|
|
|
})));
|
2025-06-26 14:40:42 -04:00
|
|
|
|
}
|
|
|
|
|
|
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
|
|
|
|
|
|
// These events are exclusive to the Responses API and
|
|
|
|
|
|
// will never appear in a Chat Completions stream.
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2025-08-04 21:23:22 -07:00
|
|
|
|
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
// Always accumulate deltas so we can emit a final OutputItemDone at Completed.
|
|
|
|
|
|
this.cumulative.push_str(&delta);
|
|
|
|
|
|
if matches!(this.mode, AggregateMode::Streaming) {
|
|
|
|
|
|
// In streaming mode, also forward the delta immediately.
|
|
|
|
|
|
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2025-08-04 21:23:22 -07:00
|
|
|
|
}
|
2025-08-05 01:56:13 -07:00
|
|
|
|
Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta(delta)))) => {
|
|
|
|
|
|
// Always accumulate reasoning deltas so we can emit a final Reasoning item at Completed.
|
|
|
|
|
|
this.cumulative_reasoning.push_str(&delta);
|
|
|
|
|
|
if matches!(this.mode, AggregateMode::Streaming) {
|
|
|
|
|
|
// In streaming mode, also forward the delta immediately.
|
|
|
|
|
|
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta(delta))));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(_)))) => {
|
|
|
|
|
|
continue;
|
2025-07-16 15:11:18 -07:00
|
|
|
|
}
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Extension trait that activates aggregation on any stream of [`ResponseEvent`].
|
|
|
|
|
|
pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Sized {
|
|
|
|
|
|
/// Returns a new stream that emits **only** the final assistant message
|
|
|
|
|
|
/// per turn instead of every incremental delta. The produced
|
|
|
|
|
|
/// `ResponseEvent` sequence for a typical text turn looks like:
|
|
|
|
|
|
///
|
|
|
|
|
|
/// ```ignore
|
|
|
|
|
|
/// OutputItemDone(<full message>)
|
2025-06-26 14:40:42 -04:00
|
|
|
|
/// Completed
|
2025-05-08 21:46:06 -07:00
|
|
|
|
/// ```
|
|
|
|
|
|
///
|
|
|
|
|
|
/// No other `OutputItemDone` events will be seen by the caller.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Usage:
|
|
|
|
|
|
///
|
|
|
|
|
|
/// ```ignore
|
|
|
|
|
|
/// let agg_stream = client.stream(&prompt).await?.aggregate();
|
|
|
|
|
|
/// while let Some(event) = agg_stream.next().await {
|
|
|
|
|
|
/// // event now contains cumulative text
|
|
|
|
|
|
/// }
|
|
|
|
|
|
/// ```
|
|
|
|
|
|
fn aggregate(self) -> AggregatedChatStream<Self> {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
AggregatedChatStream::new(self, AggregateMode::AggregatedOnly)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl<T> AggregateStreamExt for T where T: Stream<Item = Result<ResponseEvent>> + Sized {}
|
|
|
|
|
|
|
|
|
|
|
|
impl<S> AggregatedChatStream<S> {
|
|
|
|
|
|
fn new(inner: S, mode: AggregateMode) -> Self {
|
2025-05-08 21:46:06 -07:00
|
|
|
|
AggregatedChatStream {
|
2025-08-05 01:56:13 -07:00
|
|
|
|
inner,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
cumulative: String::new(),
|
2025-08-05 01:56:13 -07:00
|
|
|
|
cumulative_reasoning: String::new(),
|
|
|
|
|
|
pending: std::collections::VecDeque::new(),
|
|
|
|
|
|
mode,
|
2025-05-08 21:46:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-05 01:56:13 -07:00
|
|
|
|
pub(crate) fn streaming_mode(inner: S) -> Self {
|
|
|
|
|
|
Self::new(inner, AggregateMode::Streaming)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|