feat: add ZDR support to Rust implementation (#642)

This adds support for the `--disable-response-storage` flag across our
multiple Rust CLIs to support customers who have opted into Zero-Data
Retention (ZDR). The analogous changes to the TypeScript CLI were:

* https://github.com/openai/codex/pull/481
* https://github.com/openai/codex/pull/543

For a client using ZDR, `previous_response_id` will never be available,
so the `input` field of an API request must include the full transcript
of the conversation thus far. As such, this PR changes the type of
`Prompt.input` from `Vec<ResponseInputItem>` to `Vec<ResponseItem>`.

Practically speaking, `ResponseItem` was effectively a "superset" of
`ResponseInputItem` already. The main difference for us is that
`ResponseItem` includes the `FunctionCall` variant that we have to
include as part of the conversation history in the ZDR case.

Another key change in this PR is modifying `try_run_turn()` so that it
returns the `Vec<ResponseItem>` for the turn in addition to the
`Vec<ResponseInputItem>` produced by `try_run_turn()`. This is because
the caller of `run_turn()` needs to record the `Vec<ResponseItem>` when
ZDR is enabled.

To that end, this PR introduces `ZdrTranscript` (and adds
`zdr_transcript: Option<ZdrTranscript>` to `struct State` in `codex.rs`)
to take responsibility for maintaining the conversation transcript in
the ZDR case.
This commit is contained in:
Michael Bolin
2025-04-25 12:08:18 -07:00
committed by GitHub
parent dc7b83666a
commit b323d10ea7
18 changed files with 206 additions and 33 deletions

View File

@@ -28,15 +28,20 @@ use crate::flags::CODEX_RS_SSE_FIXTURE;
use crate::flags::OPENAI_API_BASE;
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
use crate::models::ResponseInputItem;
use crate::models::ResponseItem;
use crate::util::backoff;
/// API request payload for a single model turn.
#[derive(Default, Debug, Clone)]
pub struct Prompt {
pub input: Vec<ResponseInputItem>,
/// Conversation context input items.
pub input: Vec<ResponseItem>,
/// Optional previous response ID (when storage is enabled).
pub prev_id: Option<String>,
/// Optional initial instructions (only sent on first turn).
pub instructions: Option<String>,
/// Whether to store response on server side (disable_response_storage = !store).
pub store: bool,
}
#[derive(Debug)]
@@ -50,13 +55,18 @@ struct Payload<'a> {
model: &'a str,
#[serde(skip_serializing_if = "Option::is_none")]
instructions: Option<&'a String>,
input: &'a Vec<ResponseInputItem>,
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
// we code defensively to avoid this case, but perhaps we should use a
// separate enum for serialization.
input: &'a Vec<ResponseItem>,
tools: &'a [Tool],
tool_choice: &'static str,
parallel_tool_calls: bool,
reasoning: Option<Reasoning>,
#[serde(skip_serializing_if = "Option::is_none")]
previous_response_id: Option<String>,
/// true when using the Responses API.
store: bool,
stream: bool,
}
@@ -151,6 +161,7 @@ impl ModelClient {
generate_summary: None,
}),
previous_response_id: prompt.prev_id.clone(),
store: prompt.store,
stream: true,
};

View File

@@ -55,6 +55,7 @@ use crate::safety::assess_command_safety;
use crate::safety::assess_patch_safety;
use crate::safety::SafetyCheck;
use crate::util::backoff;
use crate::zdr_transcript::ZdrTranscript;
/// The high-level interface to the Codex system.
/// It operates as a queue pair where you send submissions and receive events.
@@ -214,6 +215,7 @@ struct State {
previous_response_id: Option<String>,
pending_approvals: HashMap<String, oneshot::Sender<ReviewDecision>>,
pending_input: Vec<ResponseInputItem>,
zdr_transcript: Option<ZdrTranscript>,
}
impl Session {
@@ -399,6 +401,7 @@ impl State {
Self {
approved_commands: self.approved_commands.clone(),
previous_response_id: self.previous_response_id.clone(),
zdr_transcript: self.zdr_transcript.clone(),
..Default::default()
}
}
@@ -489,6 +492,7 @@ async fn submission_loop(
instructions,
approval_policy,
sandbox_policy,
disable_response_storage,
} => {
let model = model.unwrap_or_else(|| OPENAI_DEFAULT_MODEL.to_string());
info!(model, "Configuring session");
@@ -500,7 +504,14 @@ async fn submission_loop(
sess.abort();
sess.state.lock().unwrap().partial_clone()
}
None => State::default(),
None => State {
zdr_transcript: if disable_response_storage {
Some(ZdrTranscript::new())
} else {
None
},
..Default::default()
},
};
// update session
@@ -587,18 +598,54 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
return;
}
let mut turn_input = vec![ResponseInputItem::from(input)];
let mut pending_response_input: Vec<ResponseInputItem> = vec![ResponseInputItem::from(input)];
loop {
let pending_input = sess.get_pending_input();
turn_input.splice(0..0, pending_input);
let mut net_new_turn_input = pending_response_input
.drain(..)
.map(ResponseItem::from)
.collect::<Vec<_>>();
// Note that pending_input would be something like a message the user
// submitted through the UI while the model was running. Though the UI
// may support this, the model might not.
let pending_input = sess.get_pending_input().into_iter().map(ResponseItem::from);
net_new_turn_input.extend(pending_input);
let turn_input: Vec<ResponseItem> =
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
// If we are using ZDR, we need to send the transcript with every turn.
let mut full_transcript = transcript.contents();
full_transcript.extend(net_new_turn_input.clone());
transcript.record_items(net_new_turn_input);
full_transcript
} else {
net_new_turn_input
};
match run_turn(&sess, sub_id.clone(), turn_input).await {
Ok(turn_output) => {
if turn_output.is_empty() {
let (items, responses): (Vec<_>, Vec<_>) = turn_output
.into_iter()
.map(|p| (p.item, p.response))
.unzip();
let responses = responses
.into_iter()
.flatten()
.collect::<Vec<ResponseInputItem>>();
// Only attempt to take the lock if there is something to record.
if !items.is_empty() {
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
transcript.record_items(items);
}
}
if responses.is_empty() {
debug!("Turn completed");
break;
}
turn_input = turn_output;
pending_response_input = responses;
}
Err(e) => {
info!("Turn error: {e:#}");
@@ -624,21 +671,31 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
async fn run_turn(
sess: &Session,
sub_id: String,
input: Vec<ResponseInputItem>,
) -> CodexResult<Vec<ResponseInputItem>> {
let prev_id = {
input: Vec<ResponseItem>,
) -> CodexResult<Vec<ProcessedResponseItem>> {
// Decide whether to use server-side storage (previous_response_id) or disable it
let (prev_id, store, is_first_turn) = {
let state = sess.state.lock().unwrap();
state.previous_response_id.clone()
let is_first_turn = state.previous_response_id.is_none();
if state.zdr_transcript.is_some() {
// When using ZDR, the Reponses API may send previous_response_id
// back, but trying to use it results in a 400.
(None, true, is_first_turn)
} else {
(state.previous_response_id.clone(), false, is_first_turn)
}
};
let instructions = match prev_id {
Some(_) => None,
None => sess.instructions.clone(),
let instructions = if is_first_turn {
sess.instructions.clone()
} else {
None
};
let prompt = Prompt {
input,
prev_id,
instructions,
store,
};
let mut retries = 0;
@@ -676,11 +733,20 @@ async fn run_turn(
}
}
/// When the model is prompted, it returns a stream of events. Some of these
/// events map to a `ResponseItem`. A `ResponseItem` may need to be
/// "handled" such that it produces a `ResponseInputItem` that needs to be
/// sent back to the model on the next turn.
struct ProcessedResponseItem {
item: ResponseItem,
response: Option<ResponseInputItem>,
}
async fn try_run_turn(
sess: &Session,
sub_id: &str,
prompt: &Prompt,
) -> CodexResult<Vec<ResponseInputItem>> {
) -> CodexResult<Vec<ProcessedResponseItem>> {
let mut stream = sess.client.clone().stream(prompt).await?;
// Buffer all the incoming messages from the stream first, then execute them.
@@ -694,9 +760,8 @@ async fn try_run_turn(
for event in input {
match event {
ResponseEvent::OutputItemDone(item) => {
if let Some(item) = handle_response_item(sess, sub_id, item).await? {
output.push(item);
}
let response = handle_response_item(sess, sub_id, item.clone()).await?;
output.push(ProcessedResponseItem { item, response });
}
ResponseEvent::Completed { response_id } => {
let mut state = sess.state.lock().unwrap();

View File

@@ -21,6 +21,7 @@ use tracing::debug;
pub async fn init_codex(
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
disable_response_storage: bool,
model_override: Option<String>,
) -> anyhow::Result<(CodexWrapper, Event, Arc<Notify>)> {
let ctrl_c = notify_on_sigint();
@@ -33,6 +34,7 @@ pub async fn init_codex(
instructions: config.instructions,
approval_policy,
sandbox_policy,
disable_response_storage,
})
.await?;

View File

@@ -19,6 +19,7 @@ mod models;
pub mod protocol;
mod safety;
pub mod util;
mod zdr_transcript;
pub use codex::Codex;

View File

@@ -56,6 +56,17 @@ pub enum ResponseItem {
Other,
}
impl From<ResponseInputItem> for ResponseItem {
fn from(item: ResponseInputItem) -> Self {
match item {
ResponseInputItem::Message { role, content } => Self::Message { role, content },
ResponseInputItem::FunctionCallOutput { call_id, output } => {
Self::FunctionCallOutput { call_id, output }
}
}
}
}
impl From<Vec<InputItem>> for ResponseInputItem {
fn from(items: Vec<InputItem>) -> Self {
Self::Message {

View File

@@ -33,6 +33,9 @@ pub enum Op {
approval_policy: AskForApproval,
/// How to sandbox commands executed in the system
sandbox_policy: SandboxPolicy,
/// Disable server-side response storage (send full context each request)
#[serde(default)]
disable_response_storage: bool,
},
/// Abort current task.

View File

@@ -0,0 +1,46 @@
use crate::models::ResponseItem;
/// Transcript that needs to be maintained for ZDR clients for which
/// previous_response_id is not available, so we must include the transcript
/// with every API call. This must include each `function_call` and its
/// corresponding `function_call_output`.
#[derive(Debug, Clone)]
pub(crate) struct ZdrTranscript {
/// The oldest items are at the beginning of the vector.
items: Vec<ResponseItem>,
}
impl ZdrTranscript {
pub(crate) fn new() -> Self {
Self { items: Vec::new() }
}
/// Returns a clone of the contents in the transcript.
pub(crate) fn contents(&self) -> Vec<ResponseItem> {
self.items.clone()
}
/// `items` is ordered from oldest to newest.
pub(crate) fn record_items<I>(&mut self, items: I)
where
I: IntoIterator<Item = ResponseItem>,
{
for item in items {
if is_api_message(&item) {
// Note agent-loop.ts also does filtering on some of the fields.
self.items.push(item.clone());
}
}
}
}
/// Anything that is not a system message or "reasoning" message is considered
/// an API message.
fn is_api_message(message: &ResponseItem) -> bool {
match message {
ResponseItem::Message { role, .. } => role.as_str() != "system",
ResponseItem::FunctionCall { .. } => true,
ResponseItem::FunctionCallOutput { .. } => true,
_ => false,
}
}

View File

@@ -55,6 +55,7 @@ async fn spawn_codex() -> Codex {
instructions: None,
approval_policy: AskForApproval::OnFailure,
sandbox_policy: SandboxPolicy::NetworkAndFileWriteRestricted,
disable_response_storage: false,
},
})
.await

View File

@@ -95,6 +95,7 @@ async fn keeps_previous_response_id_between_tasks() {
instructions: None,
approval_policy: AskForApproval::OnFailure,
sandbox_policy: SandboxPolicy::NetworkAndFileWriteRestricted,
disable_response_storage: false,
},
})
.await

View File

@@ -78,6 +78,7 @@ async fn retries_on_early_close() {
instructions: None,
approval_policy: AskForApproval::OnFailure,
sandbox_policy: SandboxPolicy::NetworkAndFileWriteRestricted,
disable_response_storage: false,
},
})
.await

View File

@@ -16,6 +16,10 @@ pub struct Cli {
#[arg(long = "skip-git-repo-check", default_value_t = false)]
pub skip_git_repo_check: bool,
/// Disable serverside response storage (sends the full conversation context with every request)
#[arg(long = "disable-response-storage", default_value_t = false)]
pub disable_response_storage: bool,
/// Initial instructions for the agent.
pub prompt: Option<String>,
}

View File

@@ -31,9 +31,10 @@ pub async fn run_main(cli: Cli) -> anyhow::Result<()> {
.try_init();
let Cli {
skip_git_repo_check,
model,
images,
model,
skip_git_repo_check,
disable_response_storage,
prompt,
..
} = cli;
@@ -50,8 +51,13 @@ pub async fn run_main(cli: Cli) -> anyhow::Result<()> {
// likely come from a new --execution-policy arg.
let approval_policy = AskForApproval::Never;
let sandbox_policy = SandboxPolicy::NetworkAndFileWriteRestricted;
let (codex_wrapper, event, ctrl_c) =
codex_wrapper::init_codex(approval_policy, sandbox_policy, model).await?;
let (codex_wrapper, event, ctrl_c) = codex_wrapper::init_codex(
approval_policy,
sandbox_policy,
disable_response_storage,
model,
)
.await?;
let codex = Arc::new(codex_wrapper);
info!("Codex initialized with event: {event:?}");

View File

@@ -50,6 +50,10 @@ pub struct Cli {
#[arg(long, action = ArgAction::SetTrue, default_value_t = false)]
pub allow_no_git_exec: bool,
/// Disable serverside response storage (sends the full conversation context with every request)
#[arg(long = "disable-response-storage", default_value_t = false)]
pub disable_response_storage: bool,
/// Record submissions into file as JSON
#[arg(short = 'S', long)]
pub record_submissions: Option<String>,

View File

@@ -97,6 +97,7 @@ async fn codex_main(mut cli: Cli, cfg: Config, ctrl_c: Arc<Notify>) -> anyhow::R
instructions: cfg.instructions,
approval_policy: cli.approval_policy.into(),
sandbox_policy: cli.sandbox_policy.into(),
disable_response_storage: cli.disable_response_storage,
},
};

View File

@@ -40,6 +40,7 @@ impl App<'_> {
show_git_warning: bool,
initial_images: Vec<std::path::PathBuf>,
model: Option<String>,
disable_response_storage: bool,
) -> Self {
let (app_event_tx, app_event_rx) = channel();
let scroll_event_helper = ScrollEventHelper::new(app_event_tx.clone());
@@ -85,6 +86,7 @@ impl App<'_> {
initial_prompt.clone(),
initial_images,
model,
disable_response_storage,
);
let app_state = if show_git_warning {

View File

@@ -52,6 +52,7 @@ impl ChatWidget<'_> {
initial_prompt: Option<String>,
initial_images: Vec<std::path::PathBuf>,
model: Option<String>,
disable_response_storage: bool,
) -> Self {
let (codex_op_tx, mut codex_op_rx) = unbounded_channel::<Op>();
@@ -63,15 +64,22 @@ impl ChatWidget<'_> {
let app_event_tx_clone = app_event_tx.clone();
// Create the Codex asynchronously so the UI loads as quickly as possible.
tokio::spawn(async move {
let (codex, session_event, _ctrl_c) =
match init_codex(approval_policy, sandbox_policy, model).await {
Ok(vals) => vals,
Err(e) => {
// TODO(mbolin): This error needs to be surfaced to the user.
tracing::error!("failed to initialize codex: {e}");
return;
}
};
// Initialize session; storage enabled by default
let (codex, session_event, _ctrl_c) = match init_codex(
approval_policy,
sandbox_policy,
disable_response_storage,
model,
)
.await
{
Ok(vals) => vals,
Err(e) => {
// TODO(mbolin): This error needs to be surfaced to the user.
tracing::error!("failed to initialize codex: {e}");
return;
}
};
// Forward the captured `SessionInitialized` event that was consumed
// inside `init_codex()` so it can be rendered in the UI.

View File

@@ -31,6 +31,10 @@ pub struct Cli {
#[arg(long = "skip-git-repo-check", default_value_t = false)]
pub skip_git_repo_check: bool,
/// Disable serverside response storage (sends the full conversation context with every request)
#[arg(long = "disable-response-storage", default_value_t = false)]
pub disable_response_storage: bool,
/// Convenience alias for low-friction sandboxed automatic execution (-a on-failure, -s network-and-file-write-restricted)
#[arg(long = "full-auto", default_value_t = true)]
pub full_auto: bool,

View File

@@ -107,6 +107,7 @@ fn run_ratatui_app(
approval_policy,
sandbox_policy: sandbox,
model,
disable_response_storage,
..
} = cli;
@@ -120,6 +121,7 @@ fn run_ratatui_app(
show_git_warning,
images,
model,
disable_response_storage,
);
// Bridge log receiver into the AppEvent channel so latest log lines update the UI.